polynoimial-regression/polynomial_regression.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.io.formats.format import _trim_zeros_single_float

data = pd.read_csv("test_data.csv")
x = np.asarray(data['x'].values.tolist())
y = np.asarray(data['y'].values.tolist())
x.reshape(-1, 1)
y.reshape(len(y), 1)

#Generates a matrix of x_n = x^n, treating each x_n as an independent feature
def poly_features(features, x):
    data = pd.DataFrame(np.zeros((x.shape[0],features + 1)))
    for i in range(0, features + 1):
        data.iloc[:, i] = (x ** i).reshape(-1,1)
    x_poly = np.array(data.values.tolist())
    return x_poly

#Splits the data into test and train
def split_data(x, y, test_size = 0.2, random_state = 0):
    np.random.seed(random_state)
    indices = np.random.permutation(len(x))
    data_test_size = int(x.shape[0] * test_size)

    train_indices = indices[data_test_size:]
    test_indices = indices[:data_test_size]

    return x[train_indices], x[test_indices], y[train_indices], y[test_indices]

class polynomialRegression:

    def predict(self, weights, x_sample):
        return sum(weights * x_sample) #W_0 * X0 + W_1 * X_1 ...

    def forward(self, x, y, w):
        y_pred = self.predict(w, x)
        loss = ((y_pred - y) ** 2) / 2 #Using MSE for error
        return loss, y_pred

    def update_weights(self, x, y_pred, y_true, w, learning_rate):
        for i in range(x.shape[0]):
            #RHS is learning rate times derivative of loss function with respect to w
            #Trying to get loss' = 0
            w[i] -= learning_rate * ((y_pred - y_true) * x[i])
        return w

    def train(self, x, y, epochs = 10, learning_rate = 0.001, random_state = 0):

        print("TRAINING")
        print("--------")

        #Initialize weights
        num_rows = x.shape[0]
        num_cols = x.shape[1]
        w = np.random.randn(1,num_cols) / np.sqrt(num_rows)
        w = w[0]

        train_loss = []
        train_indices = [i for i in range(num_cols)]

        for j in range(epochs):
            cost = 0
            np.random.seed(random_state)
            np.random.shuffle(train_indices)
            for i in train_indices:
                loss, y_pred = self.forward(x[i], y[i], w)
                cost += loss
                w = self.update_weights(x[i], y_pred, y[i], w, learning_rate)
            train_loss.append(cost)
            if j % 100 == 99:
                print(f"{j + 1}: {cost}")
        return w, train_loss

    def test(self, x_test, y_test, w):

        #Testing
        test_pred = []
        test_loss = []
        test_indices = [i for i in range(x_test.shape[0])]
        for i in test_indices:
            loss, y_pred = self.forward(x[i], y[i], w)
            test_pred.append(y_pred)
            test_loss.append(loss)
        return test_pred, test_loss

x = poly_features(int(input()), x)
x_train, x_test, y_train, y_test = split_data(x, y)
print(x_train.shape)
regressor = polynomialRegression()
weights, train_loss = regressor.train(x_train, y_train, epochs=200000, learning_rate = 0.00005)
print("Weights:", weights)
print("Final Loss:", train_loss[-1])