diff --git a/UFCFVQ-15-M Programming Task 2.ipynb b/UFCFVQ-15-M Programming Task 2.ipynb index cbb343ba7f709b4b0370562fda29d8a36cadb53a..1b10b946fb46d4e623fef6972abf9f671cd4e72c 100644 --- a/UFCFVQ-15-M Programming Task 2.ipynb +++ b/UFCFVQ-15-M Programming Task 2.ipynb @@ -632,63 +632,49 @@ "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "Unnamed: 0 0\n", - "id_student 0\n", - "gender 0\n", - "region 0\n", - "highest_education 0\n", - "age_band 0\n", - "disability 0\n", - "final_result 0\n", - "score 19\n", - "click_events 1371\n", - "dtype: int64" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Before Dropping\n", + "\n", + "Unnamed: 0 0\n", + "id_student 0\n", + "gender 0\n", + "region 0\n", + "highest_education 0\n", + "age_band 0\n", + "disability 0\n", + "final_result 0\n", + "score 19\n", + "click_events 1371\n", + "dtype: int64\n", + "\n", + "After Dropping\n", + "\n", + "Unnamed: 0 0\n", + "id_student 0\n", + "gender 0\n", + "region 0\n", + "highest_education 0\n", + "age_band 0\n", + "disability 0\n", + "final_result 0\n", + "score 0\n", + "click_events 0\n", + "dtype: int64\n", + "\n", + "The Dataset Shape is (25332, 10)\n" + ] } ], "source": [ "#Visiling how many null values are there before dropping it\n", - "Datasetframe.isnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Unnamed: 0 0\n", - "id_student 0\n", - "gender 0\n", - "region 0\n", - "highest_education 0\n", - "age_band 0\n", - "disability 0\n", - "final_result 0\n", - "score 0\n", - "click_events 0\n", - "dtype: int64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ + "print(f'Before Dropping\\n\\n{Datasetframe.isnull().sum()}')\n", "# All rows with empty values will be dropped\n", "Datasetframe = Datasetframe.dropna()\n", - "\n", "#Visiling how many null values are there after dropping it\n", - "Datasetframe.isnull().sum()" + "print(f'\\nAfter Dropping\\n\\n{Datasetframe.isnull().sum()}')\n", + "print(f'\\nThe Dataset Shape is {Datasetframe.shape}')" ] }, { @@ -700,7 +686,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -882,7 +868,7 @@ " </tr>\n", " </tbody>\n", "</table>\n", - "<p>25259 rows × 10 columns</p>\n", + "<p>25254 rows × 10 columns</p>\n", "</div>" ], "text/plain": [ @@ -925,10 +911,10 @@ "26719 275.0 \n", "26720 616.0 \n", "\n", - "[25259 rows x 10 columns]" + "[25254 rows x 10 columns]" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -936,7 +922,7 @@ "source": [ "#All click_events under 10 will be removed from the Datasetframes\n", "\n", - "Datasetframe = Datasetframe.drop(Datasetframe[Datasetframe['click_events'] < 10].index)\n", + "Datasetframe = Datasetframe.drop(Datasetframe[Datasetframe['click_events'] <= 10].index)\n", "# Outputing the Dataset\n", "Datasetframe" ] @@ -950,11 +936,246 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>id_student</th>\n", + " <th>gender</th>\n", + " <th>region</th>\n", + " <th>highest_education</th>\n", + " <th>age_band</th>\n", + " <th>disability</th>\n", + " <th>final_result</th>\n", + " <th>final_mark</th>\n", + " <th>click_events</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>11391</td>\n", + " <td>M</td>\n", + " <td>East Anglian Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>55<=</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>82.0</td>\n", + " <td>934.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>28400</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>67.0</td>\n", + " <td>1435.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>31604</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>76.0</td>\n", + " <td>2158.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>32885</td>\n", + " <td>F</td>\n", + " <td>West Midlands Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>0-35</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>55.0</td>\n", + " <td>1034.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>38053</td>\n", + " <td>M</td>\n", + " <td>Wales</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>68.0</td>\n", + " <td>2445.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26716</th>\n", + " <td>26741</td>\n", + " <td>2620947</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " <td>476.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26717</th>\n", + " <td>26742</td>\n", + " <td>2645731</td>\n", + " <td>F</td>\n", + " <td>East Anglian Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " <td>893.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26718</th>\n", + " <td>26743</td>\n", + " <td>2648187</td>\n", + " <td>F</td>\n", + " <td>South Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Pass</td>\n", + " <td>77.0</td>\n", + " <td>312.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26719</th>\n", + " <td>26744</td>\n", + " <td>2679821</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Withdrawn</td>\n", + " <td>92.0</td>\n", + " <td>275.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26720</th>\n", + " <td>26745</td>\n", + " <td>2684003</td>\n", + " <td>F</td>\n", + " <td>Yorkshire Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>83.0</td>\n", + " <td>616.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>25254 rows × 10 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 id_student gender region \\\n", + "0 0 11391 M East Anglian Region \n", + "1 1 28400 F Scotland \n", + "2 2 31604 F South East Region \n", + "3 3 32885 F West Midlands Region \n", + "4 4 38053 M Wales \n", + "... ... ... ... ... \n", + "26716 26741 2620947 F Scotland \n", + "26717 26742 2645731 F East Anglian Region \n", + "26718 26743 2648187 F South Region \n", + "26719 26744 2679821 F South East Region \n", + "26720 26745 2684003 F Yorkshire Region \n", + "\n", + " highest_education age_band disability final_result final_mark \\\n", + "0 HE Qualification 55<= N Pass 82.0 \n", + "1 HE Qualification 35-55 N Pass 67.0 \n", + "2 A Level or Equivalent 35-55 N Pass 76.0 \n", + "3 Lower Than A Level 0-35 N Pass 55.0 \n", + "4 A Level or Equivalent 35-55 N Pass 68.0 \n", + "... ... ... ... ... ... \n", + "26716 A Level or Equivalent 0-35 Y Distinction 89.0 \n", + "26717 Lower Than A Level 35-55 N Distinction 89.0 \n", + "26718 A Level or Equivalent 0-35 Y Pass 77.0 \n", + "26719 Lower Than A Level 35-55 N Withdrawn 92.0 \n", + "26720 HE Qualification 35-55 N Distinction 83.0 \n", + "\n", + " click_events \n", + "0 934.0 \n", + "1 1435.0 \n", + "2 2158.0 \n", + "3 1034.0 \n", + "4 2445.0 \n", + "... ... \n", + "26716 476.0 \n", + "26717 893.0 \n", + "26718 312.0 \n", + "26719 275.0 \n", + "26720 616.0 \n", + "\n", + "[25254 rows x 10 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "# Using panadas rename function to rename 'score' column in Datasetframe to 'final_mark'\n", + "\n", + "Datasetframe = Datasetframe.rename(columns={'score': 'final_mark'})\n", + "\n", + "# Outputing the Result\n", + "Datasetframe" ] }, { @@ -966,11 +1187,199 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>id_student</th>\n", + " <th>gender</th>\n", + " <th>age_band</th>\n", + " <th>disability</th>\n", + " <th>final_mark</th>\n", + " <th>click_events</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>11391</td>\n", + " <td>M</td>\n", + " <td>55<=</td>\n", + " <td>N</td>\n", + " <td>82.0</td>\n", + " <td>934.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>28400</td>\n", + " <td>F</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>67.0</td>\n", + " <td>1435.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>31604</td>\n", + " <td>F</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>76.0</td>\n", + " <td>2158.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>32885</td>\n", + " <td>F</td>\n", + " <td>0-35</td>\n", + " <td>N</td>\n", + " <td>55.0</td>\n", + " <td>1034.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>38053</td>\n", + " <td>M</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>68.0</td>\n", + " <td>2445.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26716</th>\n", + " <td>26741</td>\n", + " <td>2620947</td>\n", + " <td>F</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>89.0</td>\n", + " <td>476.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26717</th>\n", + " <td>26742</td>\n", + " <td>2645731</td>\n", + " <td>F</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>89.0</td>\n", + " <td>893.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26718</th>\n", + " <td>26743</td>\n", + " <td>2648187</td>\n", + " <td>F</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>77.0</td>\n", + " <td>312.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26719</th>\n", + " <td>26744</td>\n", + " <td>2679821</td>\n", + " <td>F</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>92.0</td>\n", + " <td>275.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26720</th>\n", + " <td>26745</td>\n", + " <td>2684003</td>\n", + " <td>F</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>83.0</td>\n", + " <td>616.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>25254 rows × 7 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 id_student gender age_band disability final_mark \\\n", + "0 0 11391 M 55<= N 82.0 \n", + "1 1 28400 F 35-55 N 67.0 \n", + "2 2 31604 F 35-55 N 76.0 \n", + "3 3 32885 F 0-35 N 55.0 \n", + "4 4 38053 M 35-55 N 68.0 \n", + "... ... ... ... ... ... ... \n", + "26716 26741 2620947 F 0-35 Y 89.0 \n", + "26717 26742 2645731 F 35-55 N 89.0 \n", + "26718 26743 2648187 F 0-35 Y 77.0 \n", + "26719 26744 2679821 F 35-55 N 92.0 \n", + "26720 26745 2684003 F 35-55 N 83.0 \n", + "\n", + " click_events \n", + "0 934.0 \n", + "1 1435.0 \n", + "2 2158.0 \n", + "3 1034.0 \n", + "4 2445.0 \n", + "... ... \n", + "26716 476.0 \n", + "26717 893.0 \n", + "26718 312.0 \n", + "26719 275.0 \n", + "26720 616.0 \n", + "\n", + "[25254 rows x 7 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "# Using panadas drop function to drop the columns ['region', 'final_result','highest education']\n", + "# As default the axis is 0, therefore it has been set in here to 1\n", + "# Moreover axis 1 will check on all the COLUMNS in each ROW\n", + "\n", + "Datasetframe = Datasetframe.drop(['region', 'final_result', 'highest_education'], axis=1)\n", + "\n", + "# printing the output without the dropped columns\n", + "Datasetframe" ] }, { @@ -982,7 +1391,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -998,7 +1407,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1014,7 +1423,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -1030,7 +1439,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -1046,7 +1455,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [