Add files via upload

2024-07-25 21:09:35 -07:00
parent 9374644bca
commit 27a5ee800a
1 changed files with 93 additions and 0 deletions
--- a/polynomial_regression.py
+++ b/polynomial_regression.py
@@ -0,0 +1,93 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from pandas.io.formats.format import _trim_zeros_single_float
+
+data = pd.read_csv("test_data.csv")
+x = np.asarray(data['x'].values.tolist())
+y = np.asarray(data['y'].values.tolist())
+x.reshape(-1, 1)
+y.reshape(len(y), 1)
+
+#Generates a matrix of x_n = x^n, treating each x_n as an independent feature
+def poly_features(features, x):
+    data = pd.DataFrame(np.zeros((x.shape[0],features + 1)))
+    for i in range(0, features + 1):
+        data.iloc[:, i] = (x ** i).reshape(-1,1)
+    x_poly = np.array(data.values.tolist())
+    return x_poly
+
+#Splits the data into test and train
+def split_data(x, y, test_size = 0.2, random_state = 0):
+    np.random.seed(random_state)
+    indices = np.random.permutation(len(x))
+    data_test_size = int(x.shape[0] * test_size)
+
+    train_indices = indices[data_test_size:]
+    test_indices = indices[:data_test_size]
+
+    return x[train_indices], x[test_indices], y[train_indices], y[test_indices]
+
+class polynomialRegression:
+
+    def predict(self, weights, x_sample):
+        return sum(weights * x_sample) #W_0 * X0 + W_1 * X_1 ...
+
+    def forward(self, x, y, w):
+        y_pred = self.predict(w, x)
+        loss = ((y_pred - y) ** 2) / 2 #Using MSE for error
+        return loss, y_pred
+
+    def update_weights(self, x, y_pred, y_true, w, learning_rate):
+        for i in range(x.shape[0]):
+            #RHS is learning rate times derivative of loss function with respect to w
+            #Trying to get loss' = 0
+            w[i] -= learning_rate * ((y_pred - y_true) * x[i])
+        return w
+
+    def train(self, x, y, epochs = 10, learning_rate = 0.001, random_state = 0):
+
+        print("TRAINING")
+        print("--------")
+
+        #Initialize weights
+        num_rows = x.shape[0]
+        num_cols = x.shape[1]
+        w = np.random.randn(1,num_cols) / np.sqrt(num_rows)
+        w = w[0]
+
+        train_loss = []
+        train_indices = [i for i in range(num_cols)]
+
+        for j in range(epochs):
+            cost = 0
+            np.random.seed(random_state)
+            np.random.shuffle(train_indices)
+            for i in train_indices:
+                loss, y_pred = self.forward(x[i], y[i], w)
+                cost += loss
+                w = self.update_weights(x[i], y_pred, y[i], w, learning_rate)
+            train_loss.append(cost)
+            if j % 100 == 99:
+                print(f"{j + 1}: {cost}")
+        return w, train_loss
+
+    def test(self, x_test, y_test, w):
+
+        #Testing
+        test_pred = []
+        test_loss = []
+        test_indices = [i for i in range(x_test.shape[0])]
+        for i in test_indices:
+            loss, y_pred = self.forward(x[i], y[i], w)
+            test_pred.append(y_pred)
+            test_loss.append(loss)
+        return test_pred, test_loss
+
+x = poly_features(int(input()), x)
+x_train, x_test, y_train, y_test = split_data(x, y)
+print(x_train.shape)
+regressor = polynomialRegression()
+weights, train_loss = regressor.train(x_train, y_train, epochs=200000, learning_rate = 0.00005)
+print("Weights:", weights)
+print("Final Loss:", train_loss[-1])