diff --git a/UFCFVQ-15-M Programming Task 2.ipynb b/UFCFVQ-15-M Programming Task 2.ipynb index f4e96a85eb1666312cf513283654a07477eb78a1..cbb343ba7f709b4b0370562fda29d8a36cadb53a 100644 --- a/UFCFVQ-15-M Programming Task 2.ipynb +++ b/UFCFVQ-15-M Programming Task 2.ipynb @@ -7,7 +7,7 @@ "# UFCFVQ-15-M Programming for Data Science\n", "# Programming Task 2\n", "\n", - "## Student Id: " + "## Student Id: 23003188" ] }, { @@ -19,11 +19,223 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>id_student</th>\n", + " <th>gender</th>\n", + " <th>region</th>\n", + " <th>highest_education</th>\n", + " <th>age_band</th>\n", + " <th>disability</th>\n", + " <th>final_result</th>\n", + " <th>score</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>11391</td>\n", + " <td>M</td>\n", + " <td>East Anglian Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>55<=</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>82.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>28400</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>67.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>31604</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>76.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>32885</td>\n", + " <td>F</td>\n", + " <td>West Midlands Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>0-35</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>55.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>38053</td>\n", + " <td>M</td>\n", + " <td>Wales</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>68.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26741</th>\n", + " <td>26741</td>\n", + " <td>2620947</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26742</th>\n", + " <td>26742</td>\n", + " <td>2645731</td>\n", + " <td>F</td>\n", + " <td>East Anglian Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26743</th>\n", + " <td>26743</td>\n", + " <td>2648187</td>\n", + " <td>F</td>\n", + " <td>South Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Pass</td>\n", + " <td>77.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26744</th>\n", + " <td>26744</td>\n", + " <td>2679821</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Withdrawn</td>\n", + " <td>92.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26745</th>\n", + " <td>26745</td>\n", + " <td>2684003</td>\n", + " <td>F</td>\n", + " <td>Yorkshire Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>83.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>26746 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 id_student gender region \\\n", + "0 0 11391 M East Anglian Region \n", + "1 1 28400 F Scotland \n", + "2 2 31604 F South East Region \n", + "3 3 32885 F West Midlands Region \n", + "4 4 38053 M Wales \n", + "... ... ... ... ... \n", + "26741 26741 2620947 F Scotland \n", + "26742 26742 2645731 F East Anglian Region \n", + "26743 26743 2648187 F South Region \n", + "26744 26744 2679821 F South East Region \n", + "26745 26745 2684003 F Yorkshire Region \n", + "\n", + " highest_education age_band disability final_result score \n", + "0 HE Qualification 55<= N Pass 82.0 \n", + "1 HE Qualification 35-55 N Pass 67.0 \n", + "2 A Level or Equivalent 35-55 N Pass 76.0 \n", + "3 Lower Than A Level 0-35 N Pass 55.0 \n", + "4 A Level or Equivalent 35-55 N Pass 68.0 \n", + "... ... ... ... ... ... \n", + "26741 A Level or Equivalent 0-35 Y Distinction 89.0 \n", + "26742 Lower Than A Level 35-55 N Distinction 89.0 \n", + "26743 A Level or Equivalent 0-35 Y Pass 77.0 \n", + "26744 Lower Than A Level 35-55 N Withdrawn 92.0 \n", + "26745 HE Qualification 35-55 N Distinction 83.0 \n", + "\n", + "[26746 rows x 9 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "# importing pandas library\n", + "import pandas as pd\n", + "\n", + "# Reading The First Dataset Using pandas\n", + "Dataset_A = pd.read_csv('task2a.csv')\n", + "\n", + "# Outputing the Dataset\n", + "Dataset_A" ] }, { @@ -35,11 +247,125 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>id_student</th>\n", + " <th>click_events</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>6516</td>\n", + " <td>2791.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>8462</td>\n", + " <td>656.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>11391</td>\n", + " <td>934.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>23629</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>23698</td>\n", + " <td>910.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26069</th>\n", + " <td>2698251</td>\n", + " <td>1511.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26070</th>\n", + " <td>2698257</td>\n", + " <td>758.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26071</th>\n", + " <td>2698535</td>\n", + " <td>4241.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26072</th>\n", + " <td>2698577</td>\n", + " <td>717.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26073</th>\n", + " <td>2698588</td>\n", + " <td>605.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>26074 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " id_student click_events\n", + "0 6516 2791.0\n", + "1 8462 656.0\n", + "2 11391 934.0\n", + "3 23629 NaN\n", + "4 23698 910.0\n", + "... ... ...\n", + "26069 2698251 1511.0\n", + "26070 2698257 758.0\n", + "26071 2698535 4241.0\n", + "26072 2698577 717.0\n", + "26073 2698588 605.0\n", + "\n", + "[26074 rows x 2 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "# Reading The Second Dataset Using pandas\n", + "# Adding Header Names ['id_student', 'click_events'] to First Row\n", + "\n", + "Dataset_B = pd.read_csv('task2b.csv', names=['id_student', 'click_events'])\n", + "\n", + "# Outputing the Dataset\n", + "Dataset_B" ] }, { @@ -51,11 +377,246 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>id_student</th>\n", + " <th>gender</th>\n", + " <th>region</th>\n", + " <th>highest_education</th>\n", + " <th>age_band</th>\n", + " <th>disability</th>\n", + " <th>final_result</th>\n", + " <th>score</th>\n", + " <th>click_events</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>11391</td>\n", + " <td>M</td>\n", + " <td>East Anglian Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>55<=</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>82.0</td>\n", + " <td>934.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>28400</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>67.0</td>\n", + " <td>1435.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>31604</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>76.0</td>\n", + " <td>2158.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>32885</td>\n", + " <td>F</td>\n", + " <td>West Midlands Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>0-35</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>55.0</td>\n", + " <td>1034.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>38053</td>\n", + " <td>M</td>\n", + " <td>Wales</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>68.0</td>\n", + " <td>2445.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26716</th>\n", + " <td>26741</td>\n", + " <td>2620947</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " <td>476.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26717</th>\n", + " <td>26742</td>\n", + " <td>2645731</td>\n", + " <td>F</td>\n", + " <td>East Anglian Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " <td>893.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26718</th>\n", + " <td>26743</td>\n", + " <td>2648187</td>\n", + " <td>F</td>\n", + " <td>South Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Pass</td>\n", + " <td>77.0</td>\n", + " <td>312.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26719</th>\n", + " <td>26744</td>\n", + " <td>2679821</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Withdrawn</td>\n", + " <td>92.0</td>\n", + " <td>275.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26720</th>\n", + " <td>26745</td>\n", + " <td>2684003</td>\n", + " <td>F</td>\n", + " <td>Yorkshire Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>83.0</td>\n", + " <td>616.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>26721 rows × 10 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 id_student gender region \\\n", + "0 0 11391 M East Anglian Region \n", + "1 1 28400 F Scotland \n", + "2 2 31604 F South East Region \n", + "3 3 32885 F West Midlands Region \n", + "4 4 38053 M Wales \n", + "... ... ... ... ... \n", + "26716 26741 2620947 F Scotland \n", + "26717 26742 2645731 F East Anglian Region \n", + "26718 26743 2648187 F South Region \n", + "26719 26744 2679821 F South East Region \n", + "26720 26745 2684003 F Yorkshire Region \n", + "\n", + " highest_education age_band disability final_result score \\\n", + "0 HE Qualification 55<= N Pass 82.0 \n", + "1 HE Qualification 35-55 N Pass 67.0 \n", + "2 A Level or Equivalent 35-55 N Pass 76.0 \n", + "3 Lower Than A Level 0-35 N Pass 55.0 \n", + "4 A Level or Equivalent 35-55 N Pass 68.0 \n", + "... ... ... ... ... ... \n", + "26716 A Level or Equivalent 0-35 Y Distinction 89.0 \n", + "26717 Lower Than A Level 35-55 N Distinction 89.0 \n", + "26718 A Level or Equivalent 0-35 Y Pass 77.0 \n", + "26719 Lower Than A Level 35-55 N Withdrawn 92.0 \n", + "26720 HE Qualification 35-55 N Distinction 83.0 \n", + "\n", + " click_events \n", + "0 934.0 \n", + "1 1435.0 \n", + "2 2158.0 \n", + "3 1034.0 \n", + "4 2445.0 \n", + "... ... \n", + "26716 476.0 \n", + "26717 893.0 \n", + "26718 312.0 \n", + "26719 275.0 \n", + "26720 616.0 \n", + "\n", + "[26721 rows x 10 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "# merging the two Datasets into one DataFrame \n", + "# Since both has same column, the margin will be based on the common column 'id student'\n", + "Datasetframe = pd.merge(Dataset_A, Dataset_B, on=['id_student'])\n", + "\n", + "# Outputing the Dataset\n", + "Datasetframe" ] }, { @@ -67,11 +628,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Unnamed: 0 0\n", + "id_student 0\n", + "gender 0\n", + "region 0\n", + "highest_education 0\n", + "age_band 0\n", + "disability 0\n", + "final_result 0\n", + "score 19\n", + "click_events 1371\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "#Visiling how many null values are there before dropping it\n", + "Datasetframe.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Unnamed: 0 0\n", + "id_student 0\n", + "gender 0\n", + "region 0\n", + "highest_education 0\n", + "age_band 0\n", + "disability 0\n", + "final_result 0\n", + "score 0\n", + "click_events 0\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# All rows with empty values will be dropped\n", + "Datasetframe = Datasetframe.dropna()\n", + "\n", + "#Visiling how many null values are there after dropping it\n", + "Datasetframe.isnull().sum()" ] }, { @@ -83,11 +700,245 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>id_student</th>\n", + " <th>gender</th>\n", + " <th>region</th>\n", + " <th>highest_education</th>\n", + " <th>age_band</th>\n", + " <th>disability</th>\n", + " <th>final_result</th>\n", + " <th>score</th>\n", + " <th>click_events</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>11391</td>\n", + " <td>M</td>\n", + " <td>East Anglian Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>55<=</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>82.0</td>\n", + " <td>934.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>28400</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>67.0</td>\n", + " <td>1435.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>31604</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>76.0</td>\n", + " <td>2158.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>32885</td>\n", + " <td>F</td>\n", + " <td>West Midlands Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>0-35</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>55.0</td>\n", + " <td>1034.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>38053</td>\n", + " <td>M</td>\n", + " <td>Wales</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Pass</td>\n", + " <td>68.0</td>\n", + " <td>2445.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26716</th>\n", + " <td>26741</td>\n", + " <td>2620947</td>\n", + " <td>F</td>\n", + " <td>Scotland</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " <td>476.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26717</th>\n", + " <td>26742</td>\n", + " <td>2645731</td>\n", + " <td>F</td>\n", + " <td>East Anglian Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>89.0</td>\n", + " <td>893.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26718</th>\n", + " <td>26743</td>\n", + " <td>2648187</td>\n", + " <td>F</td>\n", + " <td>South Region</td>\n", + " <td>A Level or Equivalent</td>\n", + " <td>0-35</td>\n", + " <td>Y</td>\n", + " <td>Pass</td>\n", + " <td>77.0</td>\n", + " <td>312.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26719</th>\n", + " <td>26744</td>\n", + " <td>2679821</td>\n", + " <td>F</td>\n", + " <td>South East Region</td>\n", + " <td>Lower Than A Level</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Withdrawn</td>\n", + " <td>92.0</td>\n", + " <td>275.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26720</th>\n", + " <td>26745</td>\n", + " <td>2684003</td>\n", + " <td>F</td>\n", + " <td>Yorkshire Region</td>\n", + " <td>HE Qualification</td>\n", + " <td>35-55</td>\n", + " <td>N</td>\n", + " <td>Distinction</td>\n", + " <td>83.0</td>\n", + " <td>616.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>25259 rows × 10 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 id_student gender region \\\n", + "0 0 11391 M East Anglian Region \n", + "1 1 28400 F Scotland \n", + "2 2 31604 F South East Region \n", + "3 3 32885 F West Midlands Region \n", + "4 4 38053 M Wales \n", + "... ... ... ... ... \n", + "26716 26741 2620947 F Scotland \n", + "26717 26742 2645731 F East Anglian Region \n", + "26718 26743 2648187 F South Region \n", + "26719 26744 2679821 F South East Region \n", + "26720 26745 2684003 F Yorkshire Region \n", + "\n", + " highest_education age_band disability final_result score \\\n", + "0 HE Qualification 55<= N Pass 82.0 \n", + "1 HE Qualification 35-55 N Pass 67.0 \n", + "2 A Level or Equivalent 35-55 N Pass 76.0 \n", + "3 Lower Than A Level 0-35 N Pass 55.0 \n", + "4 A Level or Equivalent 35-55 N Pass 68.0 \n", + "... ... ... ... ... ... \n", + "26716 A Level or Equivalent 0-35 Y Distinction 89.0 \n", + "26717 Lower Than A Level 35-55 N Distinction 89.0 \n", + "26718 A Level or Equivalent 0-35 Y Pass 77.0 \n", + "26719 Lower Than A Level 35-55 N Withdrawn 92.0 \n", + "26720 HE Qualification 35-55 N Distinction 83.0 \n", + "\n", + " click_events \n", + "0 934.0 \n", + "1 1435.0 \n", + "2 2158.0 \n", + "3 1034.0 \n", + "4 2445.0 \n", + "... ... \n", + "26716 476.0 \n", + "26717 893.0 \n", + "26718 312.0 \n", + "26719 275.0 \n", + "26720 616.0 \n", + "\n", + "[25259 rows x 10 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# add code here" + "#All click_events under 10 will be removed from the Datasetframes\n", + "\n", + "Datasetframe = Datasetframe.drop(Datasetframe[Datasetframe['click_events'] < 10].index)\n", + "# Outputing the Dataset\n", + "Datasetframe" ] }, { @@ -99,7 +950,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -115,7 +966,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -131,7 +982,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -147,7 +998,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -163,7 +1014,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -179,7 +1030,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +1046,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -212,7 +1063,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.7 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -226,7 +1077,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" }, "vscode": { "interpreter": {