Deleting Model 1, symbolicRegressor.py is now this model.

db3835de · a272-jones · 2a334f4b · 2a334f4b
Commit db3835de authored 1 month ago by a272-jones
--- a/ActualProjectCode/DjangoProject/mlModels/modelName/model1.py
+++ b/ActualProjectCode/DjangoProject/mlModels/modelName/model1.py
-import os
-import pandas as pd
-import numpy as np
-from gplearn.genetic import SymbolicRegressor
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import mean_squared_error, r2_score
-import matplotlib.pyplot as plt
-from sklearn.preprocessing import StandardScaler
-import seaborn as sns
-
-# Load the data
-project_root = os.path.dirname(os.path.dirname(__file__))
-file_path = r"C:\Users\Charlie1\PycharmProjects\shallowsinks\ActualProjectCode\DjangoProject\records\Synthetic_Data_For_Students.csv"
-data = pd.read_csv(file_path)
-
-# Will need to be changed to work with different csv files maybe ask user for their target column?
-target_col = 'SettlementValue'
-X = data.drop(target_col, axis=1)
-y = data[target_col]
-
-# dropping non numeric and nan features
-numeric_columns = X.select_dtypes(include=[np.number]).columns.tolist()
-X = X[numeric_columns]
-print(f"Features used: {len(numeric_columns)} numeric features")
-
-
-data_clean = data.dropna(subset=[*numeric_columns, target_col])
-print(f"Rows after dropping missing values: {data_clean.shape[0]} out of {data.shape[0]} ({data_clean.shape[0]/data.shape[0]*100:.1f}%)")
-
-# Redefine X and y with clean data
-X_clean = data_clean[numeric_columns]
-y_clean = data_clean[target_col]
-
-# Split the data
-X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
-
-# Scale the features
-scaler = StandardScaler()
-X_train_scaled = scaler.fit_transform(X_train)
-X_test_scaled = scaler.transform(X_test)
-
-# Configure and training the model
-print("Training the Symbolic Regressor...")
-symbolic_reg = SymbolicRegressor(
-    population_size=5000,
-    generations=20,
-    p_crossover=0.7,
-    p_subtree_mutation=0.1,
-    p_hoist_mutation=0.05,
-    p_point_mutation=0.1,
-    max_samples=0.9,
-    verbose=1,
-    parsimony_coefficient=0.01,
-    random_state=42,
-    function_set=('add', 'sub', 'mul', 'div', 'sqrt', 'log', 'sin', 'cos')
-)
-
-symbolic_reg.fit(X_train_scaled, y_train)
-
-# Make predictions
-y_pred_train = symbolic_reg.predict(X_train_scaled)
-y_pred_test = symbolic_reg.predict(X_test_scaled)
-
-# Evaluate the model
-train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
-test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
-train_r2 = r2_score(y_train, y_pred_train)
-test_r2 = r2_score(y_test, y_pred_test)
-
-print(f"Train RMSE: {train_rmse:.2f}")
-print(f"Test RMSE: {test_rmse:.2f}")
-print(f"Train R² Score: {train_r2:.4f}")
-print(f"Test R² Score: {test_r2:.4f}")
-
-# Display the learned expression
-print("\nBest symbolic expression:")
-print(symbolic_reg._program)
-
-# Plot actual vs predicted values
-plt.figure(figsize=(10, 6))
-plt.scatter(y_test, y_pred_test, alpha=0.5)
-plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
-plt.xlabel('Actual SettlementValue')
-plt.ylabel('Predicted SettlementValue')
-plt.title('Actual vs Predicted Values')
-plt.savefig('symbolic_regression_results.png')
-plt.show()
-
-# Save the model expression to a file
-with open('symbolic_regression_formula.txt', 'w') as f:
-    f.write(str(symbolic_reg._program))
-    f.write('\n\nModel Performance:\n')
-    f.write(f"Train RMSE: {train_rmse:.2f}\n")
-    f.write(f"Test RMSE: {test_rmse:.2f}\n")
-    f.write(f"Train R² Score: {train_r2:.4f}\n")
-    f.write(f"Test R² Score: {test_r2:.4f}\n")
\ No newline at end of file