From 367168400224b59040daa0ef34e5d0c1b872cdcb Mon Sep 17 00:00:00 2001 From: am2-liyanaarac <akash2.liyanaarachchi@live.uwe.ac.uk> Date: Tue, 4 Apr 2023 15:37:19 +0100 Subject: [PATCH] FR6 completed but padding changing with different column sizes --- UFCFVQ-15-M_Python_Programming_Template.ipynb | 158 +++++++++++++----- 1 file changed, 117 insertions(+), 41 deletions(-) diff --git a/UFCFVQ-15-M_Python_Programming_Template.ipynb b/UFCFVQ-15-M_Python_Programming_Template.ipynb index 0dbc269..bb72a2e 100644 --- a/UFCFVQ-15-M_Python_Programming_Template.ipynb +++ b/UFCFVQ-15-M_Python_Programming_Template.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 29, "metadata": { "deletable": false }, @@ -65,28 +65,21 @@ } ], "source": [ - "def geometric_mean(numbers):\n", + "def geometric_mean(num_lst):\n", " \"\"\"\n", " Calculate the geometric mean of a list of numbers.\n", - "\n", - " Parameters:\n", - " numbers (list): A list of positive integers or floats.\n", - "\n", - " Returns:\n", - " float: The geometric mean of the numbers in the list.\n", + " :param num_lst: A list of positive integers or floats.\n", + " :returns float: The geometric mean of the numbers in the list.\n", " \"\"\"\n", " # Initialize the product of the numbers\n", " product = 1\n", - " # Length of the list\n", - " num_elements = len(numbers)\n", "\n", " # Iterate through the list of numbers and multiply each number\n", - " for number in numbers:\n", + " for number in num_lst:\n", " product *= number\n", "\n", - " # Calculate the nth root of the product\n", - " geometric_mean_result = product ** (1 / num_elements)\n", - " return geometric_mean_result\n", + " # Calculate the nth root of the product and return it\n", + " return product ** (1 / len(num_lst))\n", "\n", "# Test the function with the provided list of numbers\n", "test_number_list = [64, 9, 90, 28, 46, 95, 34, 28, 86, 62, 14, 77, 99, 80,\n", @@ -119,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 30, "metadata": { "deletable": false }, @@ -208,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, "metadata": { "deletable": false }, @@ -222,7 +215,7 @@ } ], "source": [ - "def read_all_csv_data(file_name, conversion_indicators) -> dict:\n", + "def read_all_csv_data(file_name, conversion_indicators):\n", " \"\"\"\n", " This function will return all columns data as a dictionary\n", " :param file_name: CSV file path\n", @@ -251,9 +244,9 @@ "\n", "file_path = 'task1.csv'\n", "conversion_flags = [True, True, True, True, True, True, True, True, True, True]\n", - "data_dict = read_all_csv_data(file_path, conversion_flags)\n", + "all_csv_data = read_all_csv_data(file_path, conversion_flags)\n", "\n", - "print(data_dict)" + "print(all_csv_data)" ] }, { @@ -287,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 32, "outputs": [ { "name": "stdout", @@ -309,10 +302,10 @@ " for i, val in enumerate(sorted(lst), 1):\n", " ranks.setdefault(val, []).append(i)\n", "\n", + " # get the average rank of each item using dictionary comprehension\n", " avg_ranks = {v: sum(r) / len(r) for v, r in ranks.items()}\n", "\n", - " ranked_list = [avg_ranks[val] for val in lst]\n", - " return ranked_list\n", + " return [avg_ranks[val] for val in lst]\n", "\n", "\n", "def sm_correlation_coefficient(data1, data2):\n", @@ -332,16 +325,15 @@ " # get the sum of squared differences of each item in both lists\n", " sum_sqr_diff = 0\n", " for i in range(len(ranked_data1)):\n", - "\n", " diff = ranked_data1[i] - ranked_data2[i] # differences of each list item\n", " sqr = diff ** 2 # squared differences\n", " sum_sqr_diff += sqr # get the sum of each squared differences\n", "\n", " # Calculate the Spear-man's Rank Correlation Coefficient using the formula\n", " n = len(data1)\n", - " correlation = 1 - (6 * sum_sqr_diff) / (n * (n ** 2 - 1))\n", + " correlation_coefficient = 1 - (6 * sum_sqr_diff) / (n * (n ** 2 - 1))\n", "\n", - " return correlation\n", + " return correlation_coefficient\n", "\n", "\n", "# Read two columns of data from the CSV file\n", @@ -371,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 33, "metadata": { "deletable": false }, @@ -385,16 +377,16 @@ } ], "source": [ - "def generate_correlation_coefficients(csv_file_path):\n", + "def generate_all_correlation_coefficients(csv_file_path):\n", " # Read the data from the CSV file\n", - " con_flags = [True, True, True, True, True, True, True, True, True, True]\n", - " column_data = read_all_csv_data(csv_file_path, con_flags)\n", + " conversion_indicators = [True, True, True, True, True, True, True, True, True, True]\n", + " all_column_data = read_all_csv_data(csv_file_path, conversion_indicators)\n", "\n", " # Get the column names\n", - " column_names = list(column_data.keys())\n", + " column_names = list(all_column_data.keys())\n", "\n", " # Initialize an empty list to store the correlation coefficients\n", - " correlation_coefficients = []\n", + " all_correlation_coefficients = []\n", "\n", " # Iterate through all pairs of columns\n", " for i in range(len(column_names)):\n", @@ -403,23 +395,25 @@ " continue # skip the comparison of the same column with itself\n", "\n", " col1_name = column_names[i]\n", - " col1_data = column_data[col1_name]\n", + " col1_data = all_column_data[col1_name]\n", "\n", " col2_name = column_names[j]\n", - " col2_data = column_data[col2_name]\n", + " col2_data = all_column_data[col2_name]\n", "\n", " # Calculate the Spearman's Rank Correlation Coefficient for the current pair of columns\n", " try:\n", - " coefficient = sm_correlation_coefficient(col1_data, col2_data)\n", - " correlation_coefficients.append((col1_name, col2_name, coefficient))\n", + " sm_cor_coefficient = sm_correlation_coefficient(col1_data, col2_data)\n", + " all_correlation_coefficients.append((col1_name, col2_name, sm_cor_coefficient))\n", " except ValueError as e:\n", - " print(f\"Error occured: {e}\")\n", + " print(f\"A value error occurred: {e}\")\n", + " except Exception as ex:\n", + " print(f\"An error occurred: {ex}\")\n", "\n", - " return correlation_coefficients\n", + " return all_correlation_coefficients\n", "\n", "# Test the generate_correlation_coefficients function\n", "file_path = 'task1.csv'\n", - "result = generate_correlation_coefficients(file_path)\n", + "result = generate_all_correlation_coefficients(file_path)\n", "print(result)\n" ] }, @@ -444,15 +438,97 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 187, "metadata": { "deletable": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n", + " $ age $ pop $ share_white $ share_black $\n", + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n", + "$ age $ - $ 0.0084 $ 0.2207 $ -0.1034 $\n", + "$ pop $ 0.0084 $ - $ 0.0767 $ -0.1339 $\n", + "$ share_white $ 0.2207 $ 0.0767 $ - $ -0.4917 $\n", + "$ share_black $ -0.1034 $ -0.1339 $ -0.4917 $ - $\n", + "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n" + ] + } + ], "source": [ - "# replace with your code" + "def print_table(correlations, border_char, columns):\n", + " # Find the length of the longest string in the list and add constant to define cell length\n", + " longest_string_length = len(max(columns, key=len))\n", + " max_cell_length = longest_string_length + 5\n", + "\n", + " # print top boarder\n", + " # Initialize a variable to hold the sum of string lengths starts with 1 to hold the cell gap.\n", + " sum_col_lengths = 1\n", + "\n", + " # Loop over each string in the list and add its length to the running total\n", + " for name in columns:\n", + " sum_col_lengths += len(name)\n", + "\n", + " sum_col_lengths += len(columns) * 7\n", + " print(\" \" * max_cell_length + \"$\" * sum_col_lengths)\n", + "\n", + " # print the column names\n", + " print(\" \" * max_cell_length, end=\"\")\n", + " for name in columns:\n", + " print(border_char + \" \" * 3 + name + \" \" * 3 , end=\"\")\n", + " print(\"$\")\n", + "\n", + " # print header separator\n", + " print(\"$\" * (max_cell_length + sum_col_lengths))\n", + "\n", + " # print the correlation coefficients\n", + " for i, value in enumerate(columns):\n", + " string_len_diff = longest_string_length - len(value) + 5\n", + " space_count = int(string_len_diff / 2)\n", + "\n", + " if len(value) % 2 == 0:\n", + " print(border_char + space_count * \" \" + value + (space_count - 1) * \" \", end=\"\")\n", + " else:\n", + " print(border_char + space_count * \" \" + value + space_count * \" \", end=\"\")\n", + "\n", + " for col_name in columns:\n", + " if value == col_name:\n", + " cell_space = int(((len(col_name) + 7 ) / 2) -1) * \" \"\n", + " print(border_char + cell_space + \"-\" + cell_space, end=\"\")\n", + "\n", + " else:\n", + " for element in correlations:\n", + " if element[0] == value and element[1] == col_name:\n", + " correlation_coefficient = round(element[2], 4)\n", + " cell_space = (len(col_name)) // 2 * \" \"\n", + " if correlation_coefficient > 0:\n", + " print(border_char + \" \" +cell_space + str(correlation_coefficient) + cell_space, end=\"\")\n", + " else:\n", + " print(border_char +cell_space + str(correlation_coefficient) + cell_space, end=\"\")\n", + "\n", + "\n", + " print(\"$\")\n", + "\n", + " # print footer\n", + " # print header separator\n", + " print(\"$\" * (max_cell_length + sum_col_lengths))\n", + "\n", + "\n", + "file_path = 'task1.csv'\n", + "columns_to_include = ['age','pop', 'share_white', 'share_black']\n", + "print_table(generate_all_correlation_coefficients(file_path), '$', columns_to_include)" ] }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false + } + }, { "cell_type": "markdown", "metadata": { -- GitLab