From 76c34f7fd4d2ee77fe740c8cb9cccabe4189ec1f Mon Sep 17 00:00:00 2001 From: "Shekwoyeyilo2.gado@live.uwe.ac.uk" <sarah.y.gado@gmail.com> Date: Thu, 13 Mar 2025 15:08:22 +0000 Subject: [PATCH] parameters for all features --- models copy.ipynb | 960 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 960 insertions(+) create mode 100644 models copy.ipynb diff --git a/models copy.ipynb b/models copy.ipynb new file mode 100644 index 0000000..8203ec4 --- /dev/null +++ b/models copy.ipynb @@ -0,0 +1,960 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import ast\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import cross_val_score, KFold\n", + "from sklearn.svm import SVC\n", + "from scipy.stats import randint" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>PatientID</th>\n", + " <th>Age</th>\n", + " <th>Gender</th>\n", + " <th>Ethnicity</th>\n", + " <th>EducationLevel</th>\n", + " <th>BMI</th>\n", + " <th>Smoking</th>\n", + " <th>AlcoholConsumption</th>\n", + " <th>PhysicalActivity</th>\n", + " <th>DietQuality</th>\n", + " <th>...</th>\n", + " <th>MemoryComplaints</th>\n", + " <th>BehavioralProblems</th>\n", + " <th>ADL</th>\n", + " <th>Confusion</th>\n", + " <th>Disorientation</th>\n", + " <th>PersonalityChanges</th>\n", + " <th>DifficultyCompletingTasks</th>\n", + " <th>Forgetfulness</th>\n", + " <th>Diagnosis</th>\n", + " <th>DoctorInCharge</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>4751</td>\n", + " <td>73</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>22.927749</td>\n", + " <td>0</td>\n", + " <td>13.297218</td>\n", + " <td>6.327112</td>\n", + " <td>1.347214</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1.725883</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>4752</td>\n", + " <td>89</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>26.827681</td>\n", + " <td>0</td>\n", + " <td>4.542524</td>\n", + " <td>7.619885</td>\n", + " <td>0.518767</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2.592424</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>4753</td>\n", + " <td>73</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>17.795882</td>\n", + " <td>0</td>\n", + " <td>19.555085</td>\n", + " <td>7.844988</td>\n", + " <td>1.826335</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>7.119548</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4754</td>\n", + " <td>74</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>33.800817</td>\n", + " <td>1</td>\n", + " <td>12.209266</td>\n", + " <td>8.428001</td>\n", + " <td>7.435604</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>6.481226</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4755</td>\n", + " <td>89</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>20.716974</td>\n", + " <td>0</td>\n", + " <td>18.454356</td>\n", + " <td>6.310461</td>\n", + " <td>0.795498</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.014691</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2144</th>\n", + " <td>6895</td>\n", + " <td>61</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>39.121757</td>\n", + " <td>0</td>\n", + " <td>1.561126</td>\n", + " <td>4.049964</td>\n", + " <td>6.555306</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4.492838</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2145</th>\n", + " <td>6896</td>\n", + " <td>75</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>17.857903</td>\n", + " <td>0</td>\n", + " <td>18.767261</td>\n", + " <td>1.360667</td>\n", + " <td>2.904662</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>9.204952</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2146</th>\n", + " <td>6897</td>\n", + " <td>77</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>15.476479</td>\n", + " <td>0</td>\n", + " <td>4.594670</td>\n", + " <td>9.886002</td>\n", + " <td>8.120025</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>5.036334</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2147</th>\n", + " <td>6898</td>\n", + " <td>78</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>15.299911</td>\n", + " <td>0</td>\n", + " <td>8.674505</td>\n", + " <td>6.354282</td>\n", + " <td>1.263427</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>3.785399</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2148</th>\n", + " <td>6899</td>\n", + " <td>72</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>33.289738</td>\n", + " <td>0</td>\n", + " <td>7.890703</td>\n", + " <td>6.570993</td>\n", + " <td>7.941404</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>8.327563</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>XXXConfid</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2149 rows × 35 columns</p>\n", + "</div>" + ], + "text/plain": [ + " PatientID Age Gender Ethnicity EducationLevel BMI Smoking \\\n", + "0 4751 73 0 0 2 22.927749 0 \n", + "1 4752 89 0 0 0 26.827681 0 \n", + "2 4753 73 0 3 1 17.795882 0 \n", + "3 4754 74 1 0 1 33.800817 1 \n", + "4 4755 89 0 0 0 20.716974 0 \n", + "... ... ... ... ... ... ... ... \n", + "2144 6895 61 0 0 1 39.121757 0 \n", + "2145 6896 75 0 0 2 17.857903 0 \n", + "2146 6897 77 0 0 1 15.476479 0 \n", + "2147 6898 78 1 3 1 15.299911 0 \n", + "2148 6899 72 0 0 2 33.289738 0 \n", + "\n", + " AlcoholConsumption PhysicalActivity DietQuality ... \\\n", + "0 13.297218 6.327112 1.347214 ... \n", + "1 4.542524 7.619885 0.518767 ... \n", + "2 19.555085 7.844988 1.826335 ... \n", + "3 12.209266 8.428001 7.435604 ... \n", + "4 18.454356 6.310461 0.795498 ... \n", + "... ... ... ... ... \n", + "2144 1.561126 4.049964 6.555306 ... \n", + "2145 18.767261 1.360667 2.904662 ... \n", + "2146 4.594670 9.886002 8.120025 ... \n", + "2147 8.674505 6.354282 1.263427 ... \n", + "2148 7.890703 6.570993 7.941404 ... \n", + "\n", + " MemoryComplaints BehavioralProblems ADL Confusion \\\n", + "0 0 0 1.725883 0 \n", + "1 0 0 2.592424 0 \n", + "2 0 0 7.119548 0 \n", + "3 0 1 6.481226 0 \n", + "4 0 0 0.014691 0 \n", + "... ... ... ... ... \n", + "2144 0 0 4.492838 1 \n", + "2145 0 1 9.204952 0 \n", + "2146 0 0 5.036334 0 \n", + "2147 0 0 3.785399 0 \n", + "2148 0 1 8.327563 0 \n", + "\n", + " Disorientation PersonalityChanges DifficultyCompletingTasks \\\n", + "0 0 0 1 \n", + "1 0 0 0 \n", + "2 1 0 1 \n", + "3 0 0 0 \n", + "4 0 1 1 \n", + "... ... ... ... \n", + "2144 0 0 0 \n", + "2145 0 0 0 \n", + "2146 0 0 0 \n", + "2147 0 0 0 \n", + "2148 1 0 0 \n", + "\n", + " Forgetfulness Diagnosis DoctorInCharge \n", + "0 0 0 XXXConfid \n", + "1 1 0 XXXConfid \n", + "2 0 0 XXXConfid \n", + "3 0 0 XXXConfid \n", + "4 0 0 XXXConfid \n", + "... ... ... ... \n", + "2144 0 1 XXXConfid \n", + "2145 0 1 XXXConfid \n", + "2146 0 1 XXXConfid \n", + "2147 1 1 XXXConfid \n", + "2148 1 0 XXXConfid \n", + "\n", + "[2149 rows x 35 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('alzheimers_disease_data.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Age</th>\n", + " <th>Gender</th>\n", + " <th>Ethnicity</th>\n", + " <th>EducationLevel</th>\n", + " <th>BMI</th>\n", + " <th>Smoking</th>\n", + " <th>AlcoholConsumption</th>\n", + " <th>PhysicalActivity</th>\n", + " <th>DietQuality</th>\n", + " <th>SleepQuality</th>\n", + " <th>...</th>\n", + " <th>FunctionalAssessment</th>\n", + " <th>MemoryComplaints</th>\n", + " <th>BehavioralProblems</th>\n", + " <th>ADL</th>\n", + " <th>Confusion</th>\n", + " <th>Disorientation</th>\n", + " <th>PersonalityChanges</th>\n", + " <th>DifficultyCompletingTasks</th>\n", + " <th>Forgetfulness</th>\n", + " <th>Diagnosis</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>73</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>22.927749</td>\n", + " <td>0</td>\n", + " <td>13.297218</td>\n", + " <td>6.327112</td>\n", + " <td>1.347214</td>\n", + " <td>9.025679</td>\n", + " <td>...</td>\n", + " <td>6.518877</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1.725883</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>89</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>26.827681</td>\n", + " <td>0</td>\n", + " <td>4.542524</td>\n", + " <td>7.619885</td>\n", + " <td>0.518767</td>\n", + " <td>7.151293</td>\n", + " <td>...</td>\n", + " <td>7.118696</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2.592424</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>17.795882</td>\n", + " <td>0</td>\n", + " <td>19.555085</td>\n", + " <td>7.844988</td>\n", + " <td>1.826335</td>\n", + " <td>9.673574</td>\n", + " <td>...</td>\n", + " <td>5.895077</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>7.119548</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>33.800817</td>\n", + " <td>1</td>\n", + " <td>12.209266</td>\n", + " <td>8.428001</td>\n", + " <td>7.435604</td>\n", + " <td>8.392554</td>\n", + " <td>...</td>\n", + " <td>8.965106</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>6.481226</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>89</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>20.716974</td>\n", + " <td>0</td>\n", + " <td>18.454356</td>\n", + " <td>6.310461</td>\n", + " <td>0.795498</td>\n", + " <td>5.597238</td>\n", + " <td>...</td>\n", + " <td>6.045039</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0.014691</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2144</th>\n", + " <td>61</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>39.121757</td>\n", + " <td>0</td>\n", + " <td>1.561126</td>\n", + " <td>4.049964</td>\n", + " <td>6.555306</td>\n", + " <td>7.535540</td>\n", + " <td>...</td>\n", + " <td>0.238667</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>4.492838</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2145</th>\n", + " <td>75</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>17.857903</td>\n", + " <td>0</td>\n", + " <td>18.767261</td>\n", + " <td>1.360667</td>\n", + " <td>2.904662</td>\n", + " <td>8.555256</td>\n", + " <td>...</td>\n", + " <td>8.687480</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>9.204952</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2146</th>\n", + " <td>77</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>15.476479</td>\n", + " <td>0</td>\n", + " <td>4.594670</td>\n", + " <td>9.886002</td>\n", + " <td>8.120025</td>\n", + " <td>5.769464</td>\n", + " <td>...</td>\n", + " <td>1.972137</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>5.036334</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2147</th>\n", + " <td>78</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " <td>15.299911</td>\n", + " <td>0</td>\n", + " <td>8.674505</td>\n", + " <td>6.354282</td>\n", + " <td>1.263427</td>\n", + " <td>8.322874</td>\n", + " <td>...</td>\n", + " <td>5.173891</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>3.785399</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2148</th>\n", + " <td>72</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>33.289738</td>\n", + " <td>0</td>\n", + " <td>7.890703</td>\n", + " <td>6.570993</td>\n", + " <td>7.941404</td>\n", + " <td>9.878711</td>\n", + " <td>...</td>\n", + " <td>6.307543</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>8.327563</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2149 rows × 33 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Age Gender Ethnicity EducationLevel BMI Smoking \\\n", + "0 73 0 0 2 22.927749 0 \n", + "1 89 0 0 0 26.827681 0 \n", + "2 73 0 3 1 17.795882 0 \n", + "3 74 1 0 1 33.800817 1 \n", + "4 89 0 0 0 20.716974 0 \n", + "... ... ... ... ... ... ... \n", + "2144 61 0 0 1 39.121757 0 \n", + "2145 75 0 0 2 17.857903 0 \n", + "2146 77 0 0 1 15.476479 0 \n", + "2147 78 1 3 1 15.299911 0 \n", + "2148 72 0 0 2 33.289738 0 \n", + "\n", + " AlcoholConsumption PhysicalActivity DietQuality SleepQuality ... \\\n", + "0 13.297218 6.327112 1.347214 9.025679 ... \n", + "1 4.542524 7.619885 0.518767 7.151293 ... \n", + "2 19.555085 7.844988 1.826335 9.673574 ... \n", + "3 12.209266 8.428001 7.435604 8.392554 ... \n", + "4 18.454356 6.310461 0.795498 5.597238 ... \n", + "... ... ... ... ... ... \n", + "2144 1.561126 4.049964 6.555306 7.535540 ... \n", + "2145 18.767261 1.360667 2.904662 8.555256 ... \n", + "2146 4.594670 9.886002 8.120025 5.769464 ... \n", + "2147 8.674505 6.354282 1.263427 8.322874 ... \n", + "2148 7.890703 6.570993 7.941404 9.878711 ... \n", + "\n", + " FunctionalAssessment MemoryComplaints BehavioralProblems ADL \\\n", + "0 6.518877 0 0 1.725883 \n", + "1 7.118696 0 0 2.592424 \n", + "2 5.895077 0 0 7.119548 \n", + "3 8.965106 0 1 6.481226 \n", + "4 6.045039 0 0 0.014691 \n", + "... ... ... ... ... \n", + "2144 0.238667 0 0 4.492838 \n", + "2145 8.687480 0 1 9.204952 \n", + "2146 1.972137 0 0 5.036334 \n", + "2147 5.173891 0 0 3.785399 \n", + "2148 6.307543 0 1 8.327563 \n", + "\n", + " Confusion Disorientation PersonalityChanges \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 1 0 \n", + "3 0 0 0 \n", + "4 0 0 1 \n", + "... ... ... ... \n", + "2144 1 0 0 \n", + "2145 0 0 0 \n", + "2146 0 0 0 \n", + "2147 0 0 0 \n", + "2148 0 1 0 \n", + "\n", + " DifficultyCompletingTasks Forgetfulness Diagnosis \n", + "0 1 0 0 \n", + "1 0 1 0 \n", + "2 1 0 0 \n", + "3 0 0 0 \n", + "4 1 0 0 \n", + "... ... ... ... \n", + "2144 0 0 1 \n", + "2145 0 0 1 \n", + "2146 0 0 1 \n", + "2147 0 1 1 \n", + "2148 0 1 0 \n", + "\n", + "[2149 rows x 33 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_df =df.drop(['DoctorInCharge', 'PatientID'], axis=1, inplace=True)\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Pre-processing" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "X = df.drop('Diagnosis', axis= 1)\n", + "y = df['Diagnosis']\n", + "\n", + "#split the data into test and train\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "#apply scaler\n", + "scaler = StandardScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)\n", + "\n", + "#X_train" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Building the models" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "kf = KFold(n_splits= 5, shuffle= True, random_state= 42)\n", + "\n", + "models = {\n", + " 'DecisionTree': DecisionTreeClassifier(), 'RandomForest': RandomForestClassifier(), 'svc': SVC()\n", + "}\n", + "\n", + "# Define parameters to test using the randomized grid search\n", + "param_grids = {\n", + " 'DecisionTree': {\n", + " 'criterion': ['gini', 'entropy'],\n", + " 'max_depth': [None, 10, 20, 30, 50],\n", + " 'min_samples_split': randint(2, 10),\n", + " 'min_samples_leaf': randint(1, 5)\n", + " },\n", + " \n", + " 'RandomForest': {\n", + " 'n_estimators': randint(50, 200),\n", + " 'criterion': ['gini', 'entropy'],\n", + " 'max_depth': [None, 10, 20, 30, 50],\n", + " 'min_samples_split': randint(2, 10),\n", + " 'min_samples_leaf': randint(1, 5)\n", + " },\n", + " \n", + " 'svc': {\n", + " 'C': [0.1, 1, 10, 100, 1000],\n", + " 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],\n", + " 'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1, 10]\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running RandomizedSearchCV for DecisionTree...\n", + "Best parameters for DecisionTree: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 5}\n", + "\n", + "Running RandomizedSearchCV for RandomForest...\n", + "Best parameters for RandomForest: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 173}\n", + "\n", + "Running RandomizedSearchCV for svc...\n", + "Best parameters for svc: {'kernel': 'rbf', 'gamma': 0.001, 'C': 100}\n", + "\n" + ] + } + ], + "source": [ + "for name, model in models.items():\n", + " #print(name)\n", + " print(f\"Running RandomizedSearchCV for {name}...\")\n", + " random_search = RandomizedSearchCV(model, param_distributions=param_grids[name], cv =kf, n_iter =100, random_state=42, n_jobs=-1)\n", + " random_search.fit(X_train_scaled, y_train)\n", + " print(f\"Best parameters for {name}: {random_search.best_params_}\\n\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Running RandomizedSearchCV for DecisionTree...\n", + "Best parameters for DecisionTree: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 5}\n", + "\n", + "Running RandomizedSearchCV for RandomForest...\n", + "Best parameters for RandomForest: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 173}\n", + "\n", + "Running RandomizedSearchCV for svc...\n", + "Best parameters for svc: {'kernel': 'rbf', 'gamma': 0.001, 'C': 100}\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab