Skip to content
Snippets Groups Projects
Commit 1fe92b09 authored by Yeyi12's avatar Yeyi12
Browse files

Test

parent dedd403e
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, KFold
from sklearn.svm import SVC
from scipy.stats import randint
```
%% Cell type:code id: tags:
``` python
df = pd.read_csv('Selected_features.csv')
df
```
%% Output
Run Best Correlation Score Best Accuracy Score \
0 Run 1 0.1955 0.9102
1 Run 2 0.2241 0.9136
2 Run 3 0.2480 0.9182
3 Run 4 0.2474 0.9129
4 Run 5 0.2140 0.9122
5 Run 6 0.2195 0.9142
6 Run 7 0.2142 0.9209
7 Run 8 0.2152 0.9156
8 Run 9 0.1906 0.9182
9 Run 10 0.2151 0.9169
Selected Features
0 ['CardiovascularDisease', 'BehavioralProblems'...
1 ['Diabetes', 'BehavioralProblems', 'Confusion'...
2 ['BehavioralProblems', 'Gender', 'MemoryCompla...
3 ['Diabetes', 'BehavioralProblems', 'DietQualit...
4 ['BehavioralProblems', 'FunctionalAssessment',...
5 ['Diabetes', 'BehavioralProblems', 'SleepQuali...
6 ['CardiovascularDisease', 'BehavioralProblems'...
7 ['BehavioralProblems', 'Disorientation', 'Memo...
8 ['Diabetes', 'BehavioralProblems', 'SleepQuali...
9 ['BehavioralProblems', 'DifficultyCompletingTa...
%% Cell type:code id: tags:
``` python
sf = df.loc[df["Run"] == "Run 3", "Selected Features"].values[0]
sf
sf_list = ast.literal_eval(sf)
sf_list
```
%% Output
['BehavioralProblems',
'Gender',
'MemoryComplaints',
'FunctionalAssessment',
'ADL',
'MMSE']
%% Cell type:code id: tags:
``` python
main_df = pd.read_csv('alzheimers_disease_data.csv')
main_df
```
%% Output
PatientID Age Gender Ethnicity EducationLevel BMI Smoking \
0 4751 73 0 0 2 22.927749 0
1 4752 89 0 0 0 26.827681 0
2 4753 73 0 3 1 17.795882 0
3 4754 74 1 0 1 33.800817 1
4 4755 89 0 0 0 20.716974 0
... ... ... ... ... ... ... ...
2144 6895 61 0 0 1 39.121757 0
2145 6896 75 0 0 2 17.857903 0
2146 6897 77 0 0 1 15.476479 0
2147 6898 78 1 3 1 15.299911 0
2148 6899 72 0 0 2 33.289738 0
AlcoholConsumption PhysicalActivity DietQuality ... \
0 13.297218 6.327112 1.347214 ...
1 4.542524 7.619885 0.518767 ...
2 19.555085 7.844988 1.826335 ...
3 12.209266 8.428001 7.435604 ...
4 18.454356 6.310461 0.795498 ...
... ... ... ... ...
2144 1.561126 4.049964 6.555306 ...
2145 18.767261 1.360667 2.904662 ...
2146 4.594670 9.886002 8.120025 ...
2147 8.674505 6.354282 1.263427 ...
2148 7.890703 6.570993 7.941404 ...
MemoryComplaints BehavioralProblems ADL Confusion \
0 0 0 1.725883 0
1 0 0 2.592424 0
2 0 0 7.119548 0
3 0 1 6.481226 0
4 0 0 0.014691 0
... ... ... ... ...
2144 0 0 4.492838 1
2145 0 1 9.204952 0
2146 0 0 5.036334 0
2147 0 0 3.785399 0
2148 0 1 8.327563 0
Disorientation PersonalityChanges DifficultyCompletingTasks \
0 0 0 1
1 0 0 0
2 1 0 1
3 0 0 0
4 0 1 1
... ... ... ...
2144 0 0 0
2145 0 0 0
2146 0 0 0
2147 0 0 0
2148 1 0 0
Forgetfulness Diagnosis DoctorInCharge
0 0 0 XXXConfid
1 1 0 XXXConfid
2 0 0 XXXConfid
3 0 0 XXXConfid
4 0 0 XXXConfid
... ... ... ...
2144 0 1 XXXConfid
2145 0 1 XXXConfid
2146 0 1 XXXConfid
2147 1 1 XXXConfid
2148 1 0 XXXConfid
[2149 rows x 35 columns]
%% Cell type:code id: tags:
``` python
df_filtered = main_df[sf_list + ['Diagnosis']]
df_filtered.to_csv("filtered_selected_features.csv", index=False)
df_filtered
```
%% Output
BehavioralProblems Gender MemoryComplaints FunctionalAssessment \
0 0 0 0 6.518877
1 0 0 0 7.118696
2 0 0 0 5.895077
3 1 1 0 8.965106
4 0 0 0 6.045039
... ... ... ... ...
2144 0 0 0 0.238667
2145 1 0 0 8.687480
2146 0 0 0 1.972137
2147 0 1 0 5.173891
2148 1 0 0 6.307543
ADL MMSE Diagnosis
0 1.725883 21.463532 0
1 2.592424 20.613267 0
2 7.119548 7.356249 0
3 6.481226 13.991127 0
4 0.014691 13.517609 0
... ... ... ...
2144 4.492838 1.201190 1
2145 9.204952 6.458060 1
2146 5.036334 17.011003 1
2147 3.785399 4.030491 1
2148 8.327563 11.114777 0
[2149 rows x 7 columns]
%% Cell type:code id: tags:
``` python
correlation_matrix = df_filtered.corr()
print(correlation_matrix)
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix")
plt.show()
```
%% Output
BehavioralProblems Gender MemoryComplaints \
BehavioralProblems 1.000000 0.006099 -0.009765
Gender 0.006099 1.000000 0.003880
MemoryComplaints -0.009765 0.003880 1.000000
FunctionalAssessment -0.021941 0.033324 0.002320
ADL 0.043376 0.003865 -0.037511
MMSE 0.025408 0.025330 0.007652
Diagnosis 0.224350 -0.020975 0.306742
FunctionalAssessment ADL MMSE Diagnosis
BehavioralProblems -0.021941 0.043376 0.025408 0.224350
Gender 0.033324 0.003865 0.025330 -0.020975
MemoryComplaints 0.002320 -0.037511 0.007652 0.306742
FunctionalAssessment 1.000000 0.053904 0.024932 -0.364898
ADL 0.053904 1.000000 0.003359 -0.332346
MMSE 0.024932 0.003359 1.000000 -0.237126
Diagnosis -0.364898 -0.332346 -0.237126 1.000000
%% Cell type:markdown id: tags:
### Data Pre-processing
%% Cell type:code id: tags:
``` python
X = df_filtered.drop('Diagnosis', axis= 1)
y = df_filtered['Diagnosis']
#split the data into test and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#apply scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
```
%% Cell type:markdown id: tags:
### Building the models
%% Cell type:code id: tags:
``` python
kf = KFold(n_splits= 5, shuffle= True, random_state= 42)
models = {
'DecisionTree': DecisionTreeClassifier(), 'RandomForest': RandomForestClassifier(), 'svc': SVC()
}
# Define parameters to test using the randomized grid search
param_grids = {
'DecisionTree': {
'criterion': ['gini', 'entropy'],
'max_depth': [None, 10, 20, 30, 50],
'min_samples_split': randint(2, 10),
'min_samples_leaf': randint(1, 5)
},
'RandomForest': {
'n_estimators': randint(50, 200),
'criterion': ['gini', 'entropy'],
'max_depth': [None, 10, 20, 30, 50],
'min_samples_split': randint(2, 10),
'min_samples_leaf': randint(1, 5)
},
'svc': {
'C': [0.1, 1, 10, 100, 1000],
'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1, 10]
}
}
```
%% Cell type:code id: tags:
``` python
for name, model in models.items():
#print(name)
print(f"Running RandomizedSearchCV for {name}...")
random_search = RandomizedSearchCV(model, param_distributions=param_grids[name], cv =kf, n_iter =100, random_state=42, n_jobs=-1)
random_search.fit(X_train_scaled, y_train)
print(f"Best parameters for {name}: {random_search.best_params_}\n")
```
%% Output
Running RandomizedSearchCV for DecisionTree...
Best parameters for DecisionTree: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 7}
Best parameters for DecisionTree: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2}
Running RandomizedSearchCV for RandomForest...
Best parameters for RandomForest: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 7, 'n_estimators': 195}
Best parameters for RandomForest: {'criterion': 'gini', 'max_depth': 30, 'min_samples_leaf': 4, 'min_samples_split': 9, 'n_estimators': 112}
Running RandomizedSearchCV for svc...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment