Task 2 Updated Version

0384f6ad · wa2-alaaiddin · 7b1d5e3f · 0384f6ad
Commit 0384f6ad authored 2 years ago by wa2-alaaiddin
--- a/UFCFVQ-15-M Programming Task 2.ipynb
+++ b/UFCFVQ-15-M Programming Task 2.ipynb
@@ -7,7 +7,7 @@
    "# UFCFVQ-15-M Programming for Data Science\n",
    "# Programming Task 2\n",
    "\n",
-    "## Student Id: "
+    "## Student Id: 23003188"
   ]
  },
  {
@@ -19,11 +19,223 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>id_student</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>region</th>\n",
+       "      <th>highest_education</th>\n",
+       "      <th>age_band</th>\n",
+       "      <th>disability</th>\n",
+       "      <th>final_result</th>\n",
+       "      <th>score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11391</td>\n",
+       "      <td>M</td>\n",
+       "      <td>East Anglian Region</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>55&lt;=</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>82.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>28400</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Scotland</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>67.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>31604</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South East Region</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>76.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>32885</td>\n",
+       "      <td>F</td>\n",
+       "      <td>West Midlands Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>55.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>38053</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Wales</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>68.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26741</th>\n",
+       "      <td>26741</td>\n",
+       "      <td>2620947</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Scotland</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>89.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26742</th>\n",
+       "      <td>26742</td>\n",
+       "      <td>2645731</td>\n",
+       "      <td>F</td>\n",
+       "      <td>East Anglian Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>89.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26743</th>\n",
+       "      <td>26743</td>\n",
+       "      <td>2648187</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South Region</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>77.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26744</th>\n",
+       "      <td>26744</td>\n",
+       "      <td>2679821</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South East Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Withdrawn</td>\n",
+       "      <td>92.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26745</th>\n",
+       "      <td>26745</td>\n",
+       "      <td>2684003</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Yorkshire Region</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>83.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>26746 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Unnamed: 0  id_student gender                region  \\\n",
+       "0               0       11391      M   East Anglian Region   \n",
+       "1               1       28400      F              Scotland   \n",
+       "2               2       31604      F     South East Region   \n",
+       "3               3       32885      F  West Midlands Region   \n",
+       "4               4       38053      M                 Wales   \n",
+       "...           ...         ...    ...                   ...   \n",
+       "26741       26741     2620947      F              Scotland   \n",
+       "26742       26742     2645731      F   East Anglian Region   \n",
+       "26743       26743     2648187      F          South Region   \n",
+       "26744       26744     2679821      F     South East Region   \n",
+       "26745       26745     2684003      F      Yorkshire Region   \n",
+       "\n",
+       "           highest_education age_band disability final_result  score  \n",
+       "0           HE Qualification     55<=          N         Pass   82.0  \n",
+       "1           HE Qualification    35-55          N         Pass   67.0  \n",
+       "2      A Level or Equivalent    35-55          N         Pass   76.0  \n",
+       "3         Lower Than A Level     0-35          N         Pass   55.0  \n",
+       "4      A Level or Equivalent    35-55          N         Pass   68.0  \n",
+       "...                      ...      ...        ...          ...    ...  \n",
+       "26741  A Level or Equivalent     0-35          Y  Distinction   89.0  \n",
+       "26742     Lower Than A Level    35-55          N  Distinction   89.0  \n",
+       "26743  A Level or Equivalent     0-35          Y         Pass   77.0  \n",
+       "26744     Lower Than A Level    35-55          N    Withdrawn   92.0  \n",
+       "26745       HE Qualification    35-55          N  Distinction   83.0  \n",
+       "\n",
+       "[26746 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "# add code here"
+    "# importing pandas library\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Reading The First Dataset Using pandas\n",
+    "Dataset_A = pd.read_csv('task2a.csv')\n",
+    "\n",
+    "# Outputing the Dataset\n",
+    "Dataset_A"
   ]
  },
  {
@@ -35,11 +247,125 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id_student</th>\n",
+       "      <th>click_events</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>6516</td>\n",
+       "      <td>2791.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8462</td>\n",
+       "      <td>656.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11391</td>\n",
+       "      <td>934.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>23629</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>23698</td>\n",
+       "      <td>910.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26069</th>\n",
+       "      <td>2698251</td>\n",
+       "      <td>1511.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26070</th>\n",
+       "      <td>2698257</td>\n",
+       "      <td>758.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26071</th>\n",
+       "      <td>2698535</td>\n",
+       "      <td>4241.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26072</th>\n",
+       "      <td>2698577</td>\n",
+       "      <td>717.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26073</th>\n",
+       "      <td>2698588</td>\n",
+       "      <td>605.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>26074 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       id_student  click_events\n",
+       "0            6516        2791.0\n",
+       "1            8462         656.0\n",
+       "2           11391         934.0\n",
+       "3           23629           NaN\n",
+       "4           23698         910.0\n",
+       "...           ...           ...\n",
+       "26069     2698251        1511.0\n",
+       "26070     2698257         758.0\n",
+       "26071     2698535        4241.0\n",
+       "26072     2698577         717.0\n",
+       "26073     2698588         605.0\n",
+       "\n",
+       "[26074 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "# add code here"
+    "# Reading The Second Dataset Using pandas\n",
+    "# Adding Header Names ['id_student', 'click_events'] to First Row\n",
+    "\n",
+    "Dataset_B = pd.read_csv('task2b.csv', names=['id_student', 'click_events'])\n",
+    "\n",
+    "# Outputing the Dataset\n",
+    "Dataset_B"
   ]
  },
  {
@@ -51,11 +377,246 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>id_student</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>region</th>\n",
+       "      <th>highest_education</th>\n",
+       "      <th>age_band</th>\n",
+       "      <th>disability</th>\n",
+       "      <th>final_result</th>\n",
+       "      <th>score</th>\n",
+       "      <th>click_events</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11391</td>\n",
+       "      <td>M</td>\n",
+       "      <td>East Anglian Region</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>55&lt;=</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>82.0</td>\n",
+       "      <td>934.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>28400</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Scotland</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>1435.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>31604</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South East Region</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>76.0</td>\n",
+       "      <td>2158.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>32885</td>\n",
+       "      <td>F</td>\n",
+       "      <td>West Midlands Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>1034.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>38053</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Wales</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>2445.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26716</th>\n",
+       "      <td>26741</td>\n",
+       "      <td>2620947</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Scotland</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>89.0</td>\n",
+       "      <td>476.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26717</th>\n",
+       "      <td>26742</td>\n",
+       "      <td>2645731</td>\n",
+       "      <td>F</td>\n",
+       "      <td>East Anglian Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>89.0</td>\n",
+       "      <td>893.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26718</th>\n",
+       "      <td>26743</td>\n",
+       "      <td>2648187</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South Region</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>77.0</td>\n",
+       "      <td>312.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26719</th>\n",
+       "      <td>26744</td>\n",
+       "      <td>2679821</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South East Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Withdrawn</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>275.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26720</th>\n",
+       "      <td>26745</td>\n",
+       "      <td>2684003</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Yorkshire Region</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>83.0</td>\n",
+       "      <td>616.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>26721 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Unnamed: 0  id_student gender                region  \\\n",
+       "0               0       11391      M   East Anglian Region   \n",
+       "1               1       28400      F              Scotland   \n",
+       "2               2       31604      F     South East Region   \n",
+       "3               3       32885      F  West Midlands Region   \n",
+       "4               4       38053      M                 Wales   \n",
+       "...           ...         ...    ...                   ...   \n",
+       "26716       26741     2620947      F              Scotland   \n",
+       "26717       26742     2645731      F   East Anglian Region   \n",
+       "26718       26743     2648187      F          South Region   \n",
+       "26719       26744     2679821      F     South East Region   \n",
+       "26720       26745     2684003      F      Yorkshire Region   \n",
+       "\n",
+       "           highest_education age_band disability final_result  score  \\\n",
+       "0           HE Qualification     55<=          N         Pass   82.0   \n",
+       "1           HE Qualification    35-55          N         Pass   67.0   \n",
+       "2      A Level or Equivalent    35-55          N         Pass   76.0   \n",
+       "3         Lower Than A Level     0-35          N         Pass   55.0   \n",
+       "4      A Level or Equivalent    35-55          N         Pass   68.0   \n",
+       "...                      ...      ...        ...          ...    ...   \n",
+       "26716  A Level or Equivalent     0-35          Y  Distinction   89.0   \n",
+       "26717     Lower Than A Level    35-55          N  Distinction   89.0   \n",
+       "26718  A Level or Equivalent     0-35          Y         Pass   77.0   \n",
+       "26719     Lower Than A Level    35-55          N    Withdrawn   92.0   \n",
+       "26720       HE Qualification    35-55          N  Distinction   83.0   \n",
+       "\n",
+       "       click_events  \n",
+       "0             934.0  \n",
+       "1            1435.0  \n",
+       "2            2158.0  \n",
+       "3            1034.0  \n",
+       "4            2445.0  \n",
+       "...             ...  \n",
+       "26716         476.0  \n",
+       "26717         893.0  \n",
+       "26718         312.0  \n",
+       "26719         275.0  \n",
+       "26720         616.0  \n",
+       "\n",
+       "[26721 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "# add code here"
+    "# merging the two Datasets into one DataFrame \n",
+    "# Since both has same column, the margin will be based on the common column 'id student'\n",
+    "Datasetframe = pd.merge(Dataset_A, Dataset_B, on=['id_student'])\n",
+    "\n",
+    "# Outputing the Dataset\n",
+    "Datasetframe"
   ]
  },
  {
@@ -67,11 +628,67 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Unnamed: 0              0\n",
+       "id_student              0\n",
+       "gender                  0\n",
+       "region                  0\n",
+       "highest_education       0\n",
+       "age_band                0\n",
+       "disability              0\n",
+       "final_result            0\n",
+       "score                  19\n",
+       "click_events         1371\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "# add code here"
+    "#Visiling how many null values are there before dropping it\n",
+    "Datasetframe.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Unnamed: 0           0\n",
+       "id_student           0\n",
+       "gender               0\n",
+       "region               0\n",
+       "highest_education    0\n",
+       "age_band             0\n",
+       "disability           0\n",
+       "final_result         0\n",
+       "score                0\n",
+       "click_events         0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# All rows with empty values will be dropped\n",
+    "Datasetframe = Datasetframe.dropna()\n",
+    "\n",
+    "#Visiling how many null values are there after dropping it\n",
+    "Datasetframe.isnull().sum()"
   ]
  },
  {
@@ -83,11 +700,245 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>id_student</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>region</th>\n",
+       "      <th>highest_education</th>\n",
+       "      <th>age_band</th>\n",
+       "      <th>disability</th>\n",
+       "      <th>final_result</th>\n",
+       "      <th>score</th>\n",
+       "      <th>click_events</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11391</td>\n",
+       "      <td>M</td>\n",
+       "      <td>East Anglian Region</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>55&lt;=</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>82.0</td>\n",
+       "      <td>934.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>28400</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Scotland</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>1435.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>31604</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South East Region</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>76.0</td>\n",
+       "      <td>2158.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>32885</td>\n",
+       "      <td>F</td>\n",
+       "      <td>West Midlands Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>1034.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>38053</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Wales</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>2445.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26716</th>\n",
+       "      <td>26741</td>\n",
+       "      <td>2620947</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Scotland</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>89.0</td>\n",
+       "      <td>476.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26717</th>\n",
+       "      <td>26742</td>\n",
+       "      <td>2645731</td>\n",
+       "      <td>F</td>\n",
+       "      <td>East Anglian Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>89.0</td>\n",
+       "      <td>893.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26718</th>\n",
+       "      <td>26743</td>\n",
+       "      <td>2648187</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South Region</td>\n",
+       "      <td>A Level or Equivalent</td>\n",
+       "      <td>0-35</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Pass</td>\n",
+       "      <td>77.0</td>\n",
+       "      <td>312.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26719</th>\n",
+       "      <td>26744</td>\n",
+       "      <td>2679821</td>\n",
+       "      <td>F</td>\n",
+       "      <td>South East Region</td>\n",
+       "      <td>Lower Than A Level</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Withdrawn</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>275.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26720</th>\n",
+       "      <td>26745</td>\n",
+       "      <td>2684003</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Yorkshire Region</td>\n",
+       "      <td>HE Qualification</td>\n",
+       "      <td>35-55</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Distinction</td>\n",
+       "      <td>83.0</td>\n",
+       "      <td>616.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>25259 rows × 10 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Unnamed: 0  id_student gender                region  \\\n",
+       "0               0       11391      M   East Anglian Region   \n",
+       "1               1       28400      F              Scotland   \n",
+       "2               2       31604      F     South East Region   \n",
+       "3               3       32885      F  West Midlands Region   \n",
+       "4               4       38053      M                 Wales   \n",
+       "...           ...         ...    ...                   ...   \n",
+       "26716       26741     2620947      F              Scotland   \n",
+       "26717       26742     2645731      F   East Anglian Region   \n",
+       "26718       26743     2648187      F          South Region   \n",
+       "26719       26744     2679821      F     South East Region   \n",
+       "26720       26745     2684003      F      Yorkshire Region   \n",
+       "\n",
+       "           highest_education age_band disability final_result  score  \\\n",
+       "0           HE Qualification     55<=          N         Pass   82.0   \n",
+       "1           HE Qualification    35-55          N         Pass   67.0   \n",
+       "2      A Level or Equivalent    35-55          N         Pass   76.0   \n",
+       "3         Lower Than A Level     0-35          N         Pass   55.0   \n",
+       "4      A Level or Equivalent    35-55          N         Pass   68.0   \n",
+       "...                      ...      ...        ...          ...    ...   \n",
+       "26716  A Level or Equivalent     0-35          Y  Distinction   89.0   \n",
+       "26717     Lower Than A Level    35-55          N  Distinction   89.0   \n",
+       "26718  A Level or Equivalent     0-35          Y         Pass   77.0   \n",
+       "26719     Lower Than A Level    35-55          N    Withdrawn   92.0   \n",
+       "26720       HE Qualification    35-55          N  Distinction   83.0   \n",
+       "\n",
+       "       click_events  \n",
+       "0             934.0  \n",
+       "1            1435.0  \n",
+       "2            2158.0  \n",
+       "3            1034.0  \n",
+       "4            2445.0  \n",
+       "...             ...  \n",
+       "26716         476.0  \n",
+       "26717         893.0  \n",
+       "26718         312.0  \n",
+       "26719         275.0  \n",
+       "26720         616.0  \n",
+       "\n",
+       "[25259 rows x 10 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
-    "# add code here"
+    "#All click_events under 10 will be removed from the Datasetframes\n",
+    "\n",
+    "Datasetframe = Datasetframe.drop(Datasetframe[Datasetframe['click_events'] < 10].index)\n",
+    "# Outputing the Dataset\n",
+    "Datasetframe"
   ]
  },
  {
@@ -99,7 +950,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -115,7 +966,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -131,7 +982,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -147,7 +998,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -163,7 +1014,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -179,7 +1030,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -195,7 +1046,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -212,7 +1063,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3.9.7 64-bit",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -226,7 +1077,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.13"
  },
  "vscode": {
   "interpreter": {

 %% Cell type:markdown id: tags:

 # UFCFVQ-15-M Programming for Data Science
 # Programming Task 2

-## Student Id:
+## Student Id: 23003188

 %% Cell type:markdown id: tags:

 ### Requirement FR2.1 - Read CSV data from a file (with a header row) into memory

 %% Cell type:code id: tags:

 ``` python
-# add code here
+# importing pandas library
+import pandas as pd
+
+# Reading The First Dataset Using pandas
+Dataset_A = pd.read_csv('task2a.csv')
+
+# Outputing the Dataset
+Dataset_A
 ```

+%% Output
+
+           Unnamed: 0  id_student gender                region  \
+    0               0       11391      M   East Anglian Region
+    1               1       28400      F              Scotland
+    2               2       31604      F     South East Region
+    3               3       32885      F  West Midlands Region
+    4               4       38053      M                 Wales
+    ...           ...         ...    ...                   ...
+    26741       26741     2620947      F              Scotland
+    26742       26742     2645731      F   East Anglian Region
+    26743       26743     2648187      F          South Region
+    26744       26744     2679821      F     South East Region
+    26745       26745     2684003      F      Yorkshire Region
+    
+               highest_education age_band disability final_result  score
+    0           HE Qualification     55<=          N         Pass   82.0
+    1           HE Qualification    35-55          N         Pass   67.0
+    2      A Level or Equivalent    35-55          N         Pass   76.0
+    3         Lower Than A Level     0-35          N         Pass   55.0
+    4      A Level or Equivalent    35-55          N         Pass   68.0
+    ...                      ...      ...        ...          ...    ...
+    26741  A Level or Equivalent     0-35          Y  Distinction   89.0
+    26742     Lower Than A Level    35-55          N  Distinction   89.0
+    26743  A Level or Equivalent     0-35          Y         Pass   77.0
+    26744     Lower Than A Level    35-55          N    Withdrawn   92.0
+    26745       HE Qualification    35-55          N  Distinction   83.0
+    
+    [26746 rows x 9 columns]
+
 %% Cell type:markdown id: tags:

 ### Requirement FR2.2 - Read CSV data from a file (without a header row) into memory

 %% Cell type:code id: tags:

 ``` python
-# add code here
+# Reading The Second Dataset Using pandas
+# Adding Header Names ['id_student', 'click_events'] to First Row
+
+Dataset_B = pd.read_csv('task2b.csv', names=['id_student', 'click_events'])
+
+# Outputing the Dataset
+Dataset_B
 ```

+%% Output
+
+           id_student  click_events
+    0            6516        2791.0
+    1            8462         656.0
+    2           11391         934.0
+    3           23629           NaN
+    4           23698         910.0
+    ...           ...           ...
+    26069     2698251        1511.0
+    26070     2698257         758.0
+    26071     2698535        4241.0
+    26072     2698577         717.0
+    26073     2698588         605.0
+    
+    [26074 rows x 2 columns]
+
 %% Cell type:markdown id: tags:

 ### Requirement FR2.3 - Merge the data from two Dataframes

 %% Cell type:code id: tags:

 ``` python
-# add code here
-```
+# merging the two Datasets into one DataFrame
+# Since both has same column, the margin will be based on the common column 'id student'
+Datasetframe = pd.merge(Dataset_A, Dataset_B, on=['id_student'])
+
+# Outputing the Dataset
+Datasetframe
+```
+
+%% Output
+
+           Unnamed: 0  id_student gender                region  \
+    0               0       11391      M   East Anglian Region
+    1               1       28400      F              Scotland
+    2               2       31604      F     South East Region
+    3               3       32885      F  West Midlands Region
+    4               4       38053      M                 Wales
+    ...           ...         ...    ...                   ...
+    26716       26741     2620947      F              Scotland
+    26717       26742     2645731      F   East Anglian Region
+    26718       26743     2648187      F          South Region
+    26719       26744     2679821      F     South East Region
+    26720       26745     2684003      F      Yorkshire Region
+    
+               highest_education age_band disability final_result  score  \
+    0           HE Qualification     55<=          N         Pass   82.0
+    1           HE Qualification    35-55          N         Pass   67.0
+    2      A Level or Equivalent    35-55          N         Pass   76.0
+    3         Lower Than A Level     0-35          N         Pass   55.0
+    4      A Level or Equivalent    35-55          N         Pass   68.0
+    ...                      ...      ...        ...          ...    ...
+    26716  A Level or Equivalent     0-35          Y  Distinction   89.0
+    26717     Lower Than A Level    35-55          N  Distinction   89.0
+    26718  A Level or Equivalent     0-35          Y         Pass   77.0
+    26719     Lower Than A Level    35-55          N    Withdrawn   92.0
+    26720       HE Qualification    35-55          N  Distinction   83.0
+    
+           click_events
+    0             934.0
+    1            1435.0
+    2            2158.0
+    3            1034.0
+    4            2445.0
+    ...             ...
+    26716         476.0
+    26717         893.0
+    26718         312.0
+    26719         275.0
+    26720         616.0
+    
+    [26721 rows x 10 columns]

 %% Cell type:markdown id: tags:

 ### Requirement FR2.4 - Remove any rows that contain missing values

 %% Cell type:code id: tags:

 ``` python
-# add code here
+#Visiling how many null values are there before dropping it
+Datasetframe.isnull().sum()
+```
+
+%% Output
+
+    Unnamed: 0              0
+    id_student              0
+    gender                  0
+    region                  0
+    highest_education       0
+    age_band                0
+    disability              0
+    final_result            0
+    score                  19
+    click_events         1371
+    dtype: int64
+
+%% Cell type:code id: tags:
+
+``` python
+# All rows with empty values will be dropped
+Datasetframe = Datasetframe.dropna()
+
+#Visiling how many null values are there after dropping it
+Datasetframe.isnull().sum()
 ```

+%% Output
+
+    Unnamed: 0           0
+    id_student           0
+    gender               0
+    region               0
+    highest_education    0
+    age_band             0
+    disability           0
+    final_result         0
+    score                0
+    click_events         0
+    dtype: int64
+
 %% Cell type:markdown id: tags:

 ### Requirement FR2.5 - Filter out unnecessary rows

 %% Cell type:code id: tags:

 ``` python
-# add code here
-```
+#All click_events under 10 will be removed from the Datasetframes
+
+Datasetframe = Datasetframe.drop(Datasetframe[Datasetframe['click_events'] < 10].index)
+# Outputing the Dataset
+Datasetframe
+```
+
+%% Output
+
+           Unnamed: 0  id_student gender                region  \
+    0               0       11391      M   East Anglian Region
+    1               1       28400      F              Scotland
+    2               2       31604      F     South East Region
+    3               3       32885      F  West Midlands Region
+    4               4       38053      M                 Wales
+    ...           ...         ...    ...                   ...
+    26716       26741     2620947      F              Scotland
+    26717       26742     2645731      F   East Anglian Region
+    26718       26743     2648187      F          South Region
+    26719       26744     2679821      F     South East Region
+    26720       26745     2684003      F      Yorkshire Region
+    
+               highest_education age_band disability final_result  score  \
+    0           HE Qualification     55<=          N         Pass   82.0
+    1           HE Qualification    35-55          N         Pass   67.0
+    2      A Level or Equivalent    35-55          N         Pass   76.0
+    3         Lower Than A Level     0-35          N         Pass   55.0
+    4      A Level or Equivalent    35-55          N         Pass   68.0
+    ...                      ...      ...        ...          ...    ...
+    26716  A Level or Equivalent     0-35          Y  Distinction   89.0
+    26717     Lower Than A Level    35-55          N  Distinction   89.0
+    26718  A Level or Equivalent     0-35          Y         Pass   77.0
+    26719     Lower Than A Level    35-55          N    Withdrawn   92.0
+    26720       HE Qualification    35-55          N  Distinction   83.0
+    
+           click_events
+    0             934.0
+    1            1435.0
+    2            2158.0
+    3            1034.0
+    4            2445.0
+    ...             ...
+    26716         476.0
+    26717         893.0
+    26718         312.0
+    26719         275.0
+    26720         616.0
+    
+    [25259 rows x 10 columns]

 %% Cell type:markdown id: tags:

 ### Requirement FR2.6 - Rename the score column

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 ### Requirement FR2.7 - Remove unnecessary column(s)

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 ### Requirement FR2.8 - Write the DataFrame data to a CSV file

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 ### Requirement FR2.9 - Investigate the effects of age-group on attainment and engagement

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 ### Requirement FR2.10 - Present the results of the age-group investigation using an appropriate visualisation

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 ### Requirement FR2.11 - Investigate the effects of engagement on attainment

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 ### Requirement FR2.12 - Test the hypothesis that there is a significant effect on attainment

 %% Cell type:code id: tags:

 ``` python
 # add code here
 ```

 %% Cell type:markdown id: tags:

 # Process Development Report for Task 2