diff --git a/UFCFVQ-15-M Programming Task 1.ipynb b/UFCFVQ-15-M Programming Task 1.ipynb index 217f4c6f34319cb61b681598192e410ed62b0d3d..7709324229324a10a6ff7dba8355f4a0b5729f26 100644 --- a/UFCFVQ-15-M Programming Task 1.ipynb +++ b/UFCFVQ-15-M Programming Task 1.ipynb @@ -70,7 +70,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The mean of the list = 120.895\n" + "The mean of the list = 120.89453125\n" ] } ], @@ -79,33 +79,15 @@ "def meanFinder(List):\n", " #assining the varivale to be global so it can be called any were else (as it has been called in FR1.7)\n", " global ListMean\n", - " MeanListSum = 0\n", - " MeanListLength = 0\n", - " \n", - " #Find the sum of the list using pure python\n", - " for DataInList in List:\n", - " MeanListSum += DataInList\n", - " \n", - " #Find the Length of the list using pure python \n", - " for DataLength in List:\n", - " MeanListLength += 1\n", " \n", " #Find the mean of the list using the mathematical mean equation\n", - " ListMean = MeanListSum/MeanListLength\n", - " \n", - " #Print The result and round it to 3 decimal places \n", - " print(f'The mean of the list = {round(ListMean,3)}')\n", - "\n", - " ###\n", - " ##Fast Alternative way with pre-built python functions\n", - " ##---\n", - " #meanEq = sum(List)/len(List)\n", - " #return meanEq\n", - " ##---\n", - " ###\n", + " ListMean = sum(List)/len(List)\n", " \n", + " #Rturn The result\n", + " return ListMean\n", + "\n", "#Calling the function and passing the >> List << argument to it \n", - "meanFinder(List)" + "print(f'The mean of the list = {meanFinder(List)}')" ] }, { @@ -132,37 +114,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "Standard Devision of the list = 31.952\n" + "Standard Devision of the list = 31.95179590820272\n" ] } ], "source": [ "#Function to find the standard deviation\n", - "def StdDevFinder(List):\n", - " #assining the varivale to be global so it can be called any were else (as it has been called in FR1.7)\n", - " global STD_DEV\n", - " \n", - " \n", - " #Find the sum of the list using pre-bulid python function (len)\n", - " StdListSum = sum(List)\n", - " \n", - " #Find the length of the list using pre-bulid python function (len)\n", - " StdListLength = len(List)\n", - " \n", - " #Find the mean of the list\n", - " StdListMean = StdListSum/StdListLength\n", - " \n", + "def StdDevFinder(List): \n", " #Find the variance among the list using its mathematical equation\n", " # (Square each deviation from the mean & Calculate the variance)\n", - " StdVariance = sum((DataInList - StdListMean)**2 for DataInList in List) / StdListLength\n", + " StdVariance = sum((DataInList - ListMean)**2 for DataInList in List) / len(List)\n", " STD_DEV = StdVariance ** 0.5\n", " \n", - " #Print The result and round it to 3 decimal places \n", - " print(f'Standard Devision of the list = {round(STD_DEV,3)}')\n", + " #Rturn The result\n", + " return STD_DEV\n", "\n", "\n", "#Calling the function and passing the >> List << argument to it \n", - "StdDevFinder(List)" + "print(f'Standard Devision of the list = {StdDevFinder(List)}')" ] }, { @@ -190,55 +159,27 @@ "output_type": "stream", "text": [ "The minimum number in the list = 0\n", - "The max number in the list = 199\n" + "The maximum in the list = 199\n" ] } ], "source": [ "#Function to find the minimum value in a python list\n", "def MinFinder(List):\n", - " #set a variable that carry the first number in the list to compare it with other numbers in the list\n", - " ListMin=List[0]\n", - " #For loop to accsses each number in the list\n", - " for numbers in range(1,len(List)):\n", - " #If the new number is smaller than the next number in the list, assign it as (ListMin) 'the new smallest number'\n", - " if List[numbers] <ListMin:\n", - " ListMin=List[numbers] \n", - " #Print out the smallest number \n", - " print(f'The minimum number in the list = {ListMin}')\n", - " \n", - " ###\n", - " ##Fast Alternative way with pre-built python functions\n", - " ##---\n", - " #ListMin = min(List)\n", - " #return ListMin\n", - " ##---\n", - " ###\n", + " ListMin = min(List)\n", + " return ListMin\n", + "\n", "\n", "#Function to find the maximum value in a python list\n", "def MaxFinder(List):\n", - " #set a variable that carry the first number in the list to compare it with other numbers in the list\n", - " ListMax=List[0]\n", - " #For loop to accsses each number in the list\n", - " for numbers in range(1,len(List)):\n", - " #If the new number is bigger than the next number in the list, assign it as (ListMax) 'the new largest number'\n", - " if(List[numbers]>ListMax):\n", - " ListMax=List[numbers]\n", - " #Print out the largest number \n", - " print(f'The max number in the list = {ListMax}')\n", - " \n", - " ###\n", - " ##Fast Alternative way with pre-build python functions\n", - " ##---\n", - " #ListMax = max(List)\n", - " #return ListMax\n", - " ##---\n", - " ###\n", - " \n", + " ListMax = max(List)\n", + " return ListMax\n", + "\n", " \n", "#Calling the functions and passing the >> List << argument to it \n", - "MinFinder(List)\n", - "MaxFinder(List)" + "print(f'The minimum number in the list = {MinFinder(List)}')\n", + "\n", + "print(f'The maximum in the list = {MaxFinder(List)}')\n" ] }, { @@ -419,7 +360,7 @@ "TabularDic = {\n", " 'Count':len(List),\n", " 'Mean':round(ListMean,3),\n", - " 'StdDev':round(STD_DEV,3),\n", + " 'StdDev':round(StdDevFinder(List),3),\n", " 'Minimum':min(List),\n", " '25 percentile':percentileOf_25,\n", " '50 percentile':percentileOf_50,\n", @@ -457,7 +398,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[+] Memory Size >>> 3.638 (KB) | 3638 Bytes | 0.003638 (MB)\n", + "[+] File Size >>> 3.638 (KB) | 3638 Bytes | 0.003638 (MB)\n", "\n", "[+] The data type is <class 'list'> and the numbers inside is <class 'int'>\n", "\n", @@ -480,14 +421,14 @@ "def getSize(file):\n", " file.seek(0,2) #To count from the from the start of the file, and stop at the end.\n", " FileSize = file.tell() # To get file position\n", - " #Print file size in memory\n", - " print(f'[+] Memory Size >>> {FileSize*0.001} (KB) | {FileSize} Bytes | {round(FileSize*0.000001,6)} (MB)\\n')\n", + " #Print file size\n", + " print(f'[+] File Size >>> {FileSize*0.001} (KB) | {FileSize} Bytes | {round(FileSize*0.000001,6)} (MB)\\n')\n", " \n", "FileName = open('task1.dat','r')\n", "getSize(FileName)\n", "\n", "print(f'[+] The data type is {type(List)} and the numbers inside is {type(List[0])}\\n')\n", - "print(f\"[+] The file content as shown:\\n\\n{List}\")\n" + "print(f\"[+] The file content as shown:\\n\\n{List[0:]}\")\n" ] }, { @@ -505,12 +446,87 @@ "### Requirement FR1.9 - Design and build a data structure to store CSV data in memory" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Following the given instruction in the appendix.A (output structure), this task has been created.\n", + "To be visualised better, the data output has been limited to 10 in each row." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Explanation 1 Example for \"EachClmToList\"\n", + "\n", + "['A','B','C'],\n", + "\n", + "['1','2','3'],\n", + "\n", + "['4','5','6'],so on..\n", + "\n", + "Therfore EachClmToList[0] = ['A','1','4'..]\n", + "\n", + "##### Explanation 2\n", + "For loop is created to reach out to every single data inside 'AppendTolist' in the range of the data length,\n", + "(768) or (767) if we count from 0. To be able to do so, 2 for loop should be created as shown, one to count the number of\n", + "columns which is (9) or 8 if we count the zero, and the other to count the number of data in each row.\n", + "therefore it will start from [0][0].. [1][0].. [2][0] until it reaches [767][0] and switch to [0][1]..\n", + "and continue until it reaches the last data in the file which is at [767][8].\n", + "while so, the data (numbers) will be appended to a new list called 'EachClmToList'\n", + "\n", + "##### Explanation 3\n", + "A mathematical operation to create a sublist of (each data under each line for the header ) to achieve exploration 1, therefore\n", + "for every end of a line create a new list for the next line, and at the end, we will have a total of 9 lists that \n", + "achieved to carry each column of data into one list " + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "CsvFile = open('task1.csv','r')\n", + "AppendTolist = [] #Creating an empty list to append all the CSV data inside it\n", + "ColumnHeader = [] #Creating an empty list to append the CSV Column Header (Name of each column)\n", + "\n", + "#Refer to Explanation (1)\n", + "EachClmToList = [] #Creating an empty list to append Each column-row (each data under each line for the header ) \n", + "\n", + "'''\n", + "For loop to read the data inside the CSV file and remove all (\\n). therefore all data will be on the same line\n", + "but split by a (,) so each line is a sublist, eg. [[Row 1],[Row 2],[Row 3]..]\n", + "'''\n", + "for data in CsvFile: \n", + " data = data.replace('\\n','').split(',')\n", + " AppendTolist.append(data) #Append (adding) the new data into the 'AppendTolist' list\n", + "#print(AppendTolist[0:5])\n", + "\n", + "#For loop to append only the first list >> [0] << of the 'AppendTolist', which is the columns header to another list.\n", + "for subList in AppendTolist[0]: \n", + " ColumnHeader.append(subList) #append to another list. \n", + "#Removing the appended it from the main list 'AppendTolist'\n", + "AppendTolist.remove(AppendTolist[0])\n", + "\n", + "#Refer to Explanation (2) \n", + "for clmLen in range(len(ColumnHeader)): #9 aka 8 as it starts from 0\n", + " #print(ColumnHeader)\n", + " for RowLen in range(len(AppendTolist)):#768 aka 767 as it satrts from 0 \n", + " #print(AppendTolist[767][8])\n", + " EachClmToList.append(AppendTolist[RowLen][clmLen]) \n", + "#print(EachClmToList)\n", + "\n", + "#to avoid python errors, a new variable is created called 'numberOfDataInEach' that carries row length in each line.\n", + "numberOfDataInEach = len(AppendTolist)\n", + "\n", + "#Refer to Explanation (3)\n", + "EachClmToList = [EachClmToList[x:x+numberOfDataInEach] for x in range(0, len(EachClmToList), numberOfDataInEach)]\n", + "\n", + "\n", + "#As per as requested the data is stored in the memory and can be called later for analyzing" + ] }, { "cell_type": "markdown", @@ -529,10 +545,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"Pregnancies\" : [6.0, 1.0, 8.0, 1.0, 0.0, 5.0, 3.0, 10.0, 2.0, 8.0]\n", + "\"Glucose\" : [148.0, 85.0, 183.0, 89.0, 137.0, 116.0, 78.0, 115.0, 197.0, 125.0]\n", + "\"BloodPressure\" : [72.0, 66.0, 64.0, 66.0, 40.0, 74.0, 50.0, 0.0, 70.0, 96.0]\n", + "\"SkinThickness\" : [35.0, 29.0, 0.0, 23.0, 35.0, 0.0, 32.0, 0.0, 45.0, 0.0]\n", + "\"Insulin\" : [0.0, 0.0, 0.0, 94.0, 168.0, 0.0, 88.0, 0.0, 543.0, 0.0]\n", + "\"BMI\" : [33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0.0]\n", + "\"DiabetesPedigreeFunction\" : [0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158, 0.232]\n", + "\"Age\" : [50.0, 31.0, 32.0, 21.0, 33.0, 30.0, 26.0, 29.0, 53.0, 54.0]\n", + "\"Outcome\" : [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0]\n" + ] + } + ], + "source": [ + "#Creating function to construct the data structure and to call it after. \n", + "def dataStrcuture():\n", + " for x in range(len(ColumnHeader)):\n", + " print(f'\"{ColumnHeader[x]}\" : {list(map(float,EachClmToList[x][:10]))}') #numbers has been converted to float\n", + " #Only 10 data have been printed to better visualize the result, for the whole result remove [:10], [:] or none\n", + " #list(map(float,EachClmToList[x][:10]))\n", + "dataStrcuture()" + ] }, { "cell_type": "markdown", @@ -551,16 +591,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 91, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"Stats\" : [\"Mean\",\"Stdev\", \"Min\", Max\",\"25%\",\"50%\",\"75%\"]\n", + "\n", + "\"Pregnancies\" : [3.845, 3.367, 0.0, 17.0, 1.0, 3.0, 6.0]\n", + "\"Glucose\" : [120.895, 31.952, 0.0, 199.0, 99.0, 117.0, 141.0]\n", + "\"BloodPressure\" : [69.105, 19.343, 0.0, 122.0, 62.0, 72.0, 80.0]\n", + "\"SkinThickness\" : [20.536, 15.942, 0.0, 99.0, 0.0, 23.0, 32.0]\n", + "\"Insulin\" : [79.799, 115.169, 0.0, 846.0, 0.0, 32.0, 128.0]\n", + "\"BMI\" : [31.993, 7.879, 0.0, 67.1, 27.3, 32.0, 36.6]\n", + "\"DiabetesPedigreeFunction\" : [0.472, 0.331, 0.078, 2.42, 0.244, 0.374, 0.627]\n", + "\"Age\" : [33.241, 11.753, 21.0, 81.0, 24.0, 29.0, 41.0]\n", + "\"Outcome\" : [0.349, 0.477, 0.0, 1.0, 0.0, 0.0, 1.0]\n" + ] + } + ], + "source": [ + "print('\"Stats\" : [\"Mean\",\"Stdev\", \"Min\", Max\",\"25%\",\"50%\",\"75%\"]\\n')\n", + "#Calling the file \n", + "def dataStrcuture():\n", + " global data\n", + " #Creating an empty list to append Each column-row (each data under each line for the header ) \n", + " ConvertdeToFloat = []\n", + "\n", + " #For loop to convet rto float in a way that it can be called later in other tasks.\n", + " for x in range(len(ColumnHeader)):\n", + " EachClmInListToFloat = list(map(float,EachClmToList[x][:]))\n", + " ConvertdeToFloat.append(EachClmInListToFloat)\n", + " \n", + " \n", + " #Creating variables that contains the required information to create the data structure\n", + " #and calling function that has been saved in memory before. \n", + " \n", + " for x in range(len(ColumnHeader)):\n", + " data = [round(meanFinder(ConvertdeToFloat[x][:]),3),\n", + " round(StdDevFinder((ConvertdeToFloat[x][:])),3),\n", + " MinFinder(ConvertdeToFloat[x][:]),\n", + " MaxFinder(ConvertdeToFloat[x][:]),\n", + " percentileFinder(ConvertdeToFloat[x][:],25), \n", + " percentileFinder(ConvertdeToFloat[x][:],50),\n", + " percentileFinder(ConvertdeToFloat[x][:],75)\n", + " ]\n", + " print(f'\"{ColumnHeader[x]}\" : {data}')\n", + " \n", + "dataStrcuture()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "##### MARK: %\n", + "# MARK: %\n", "#### FEEDBACK: " ] }, @@ -573,10 +660,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 94, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "*************************************************************************************************************************\n", + " Status Mean Stdev Min Max 25% 50% 75% \n", + "*************************************************************************************************************************\n", + " Pregnancies 3.845 3.367 17.0 0.0 1.0 3.0 6.0\n", + " Glucose 120.895 31.952 199.0 0.0 99.0 117.0 141.0\n", + " BloodPressure 69.105 19.343 122.0 0.0 62.0 72.0 80.0\n", + " SkinThickness 20.536 15.942 99.0 0.0 0.0 23.0 32.0\n", + " Insulin 79.799 115.169 846.0 0.0 0.0 32.0 128.0\n", + " BMI 31.993 7.879 67.1 0.0 27.3 32.0 36.6\n", + " DiabPedFnc 0.472 0.331 2.42 0.078 0.244 0.374 0.627\n", + " Age 33.241 11.753 81.0 21.0 24.0 29.0 41.0\n", + " Outcome 0.349 0.477 1.0 0.0 0.0 0.0 1.0\n", + "*************************************************************************************************************************\n" + ] + } + ], + "source": [ + "#Calling the file \n", + "def dataStrcuture():\n", + " \n", + " newList = [element.replace('DiabetesPedigreeFunction', 'DiabPedFnc') for element in ColumnHeader]\n", + " clmName = [newList]\n", + " Status = [\"Mean\",\"Stdev\", \"Min\", \"Max\",\"25%\",\"50%\",\"75%\"]\n", + " format_row = \"{: >13} \" * (len(Status)+1)\n", + " print(stars.rjust(121,'*'))\n", + " print(format_row.format(\"Status\", *Status))\n", + " print(stars.rjust(121,'*'))\n", + "\n", + " ConvertdeToFloat = []\n", + " \n", + " #For loop to convet into float in a way that it can be called later in other tasks.\n", + " Status = [\"Mean\",\"Stdev\", \"Min\", \"Max\",\"25%\",\"50%\",\"75%\"]\n", + "\n", + " for x in range(len(ColumnHeader)): \n", + " EachClmInListToFloat = list(map(float,EachClmToList[x][:]))\n", + " ConvertdeToFloat.append(EachClmInListToFloat)\n", + " \n", + " #Creating variables that contains the required information to create the data structure\n", + " #and calling function that has been saved in memory before.\n", + " \n", + " for x in range(len(ColumnHeader)):\n", + "\n", + " data = [[round(meanFinder(ConvertdeToFloat[x][:]),3),\n", + " round(StdDevFinder((ConvertdeToFloat[x][:])),3),\n", + " MaxFinder(ConvertdeToFloat[x][:]),\n", + " MinFinder(ConvertdeToFloat[x][:]),\n", + " percentileFinder(ConvertdeToFloat[x][:],25), \n", + " percentileFinder(ConvertdeToFloat[x][:],50),\n", + " percentileFinder(ConvertdeToFloat[x][:],75)\n", + " ]]\n", + "\n", + " format_row = \"{:>15}\" * (len(Status) + 1)\n", + " \n", + " for names, row in zip(clmName, data):\n", + " #print(team[x])\n", + " print(format_row.format(names[x], *row))\n", + "\n", + "\n", + "dataStrcuture()\n", + "stars = '*'\n", + "print(stars.rjust(121,'*'))" + ] }, { "cell_type": "markdown",