From f2de756cfd6882b5607faa558ea7554856f57f7d Mon Sep 17 00:00:00 2001
From: am2-liyanaarac <akash2.liyanaarachchi@live.uwe.ac.uk>
Date: Wed, 5 Apr 2023 12:19:38 +0100
Subject: [PATCH] Reflective report done and add small changes to all the
 fuctions

---
 UFCFVQ-15-M_Python_Programming_Template.ipynb | 114 ++++++++++++------
 1 file changed, 74 insertions(+), 40 deletions(-)

diff --git a/UFCFVQ-15-M_Python_Programming_Template.ipynb b/UFCFVQ-15-M_Python_Programming_Template.ipynb
index bb72a2e..f955381 100644
--- a/UFCFVQ-15-M_Python_Programming_Template.ipynb
+++ b/UFCFVQ-15-M_Python_Programming_Template.ipynb
@@ -201,7 +201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 190,
    "metadata": {
     "deletable": false
    },
@@ -238,8 +238,8 @@
     "                dict_data[col_name] = col_data\n",
     "\n",
     "            return dict_data\n",
-    "    except FileNotFoundError:\n",
-    "        print(f\"Error: File {file_name} not found.\")\n",
+    "    except (FileNotFoundError, Exception) as ex:\n",
+    "        print(f\"Error occurred: {ex}.\")\n",
     "        return None\n",
     "\n",
     "file_path = 'task1.csv'\n",
@@ -280,13 +280,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 194,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Spear-man's Rank Correlation Coefficient for `age` and `pop`: 0.008388522728803749\n"
+      "Spear-man's Rank Correlation Coefficient for `pop` and `share_white`: 0.07665516130573191\n"
      ]
     }
    ],
@@ -295,8 +295,7 @@
     "    \"\"\"\n",
     "    This function returns a list of ranks for the input list, taking duplicates into account.\n",
     "    :param lst: A list of data that needed to be ranked\n",
-    "    :return list: A of ranked values\n",
-    "\n",
+    "    :return list: A list of ranked values\n",
     "    \"\"\"\n",
     "    ranks = {}\n",
     "    for i, val in enumerate(sorted(lst), 1):\n",
@@ -338,15 +337,15 @@
     "\n",
     "# Read two columns of data from the CSV file\n",
     "file_path = 'task1.csv'\n",
-    "column1_name, column1_data = read_csv_column(file_path, 0, True)\n",
-    "column2_name, column2_data = read_csv_column(file_path, 1, True)\n",
+    "column1_name, column1_data = read_csv_column(file_path, 1, True)\n",
+    "column2_name, column2_data = read_csv_column(file_path, 2, True)\n",
     "\n",
     "# Calculate the Spear-man's Rank Correlation Coefficient for the two columns\n",
     "try:\n",
     "    coefficient = sm_correlation_coefficient(column1_data, column2_data)\n",
     "    print(f\"Spear-man's Rank Correlation Coefficient for `{column1_name}` and `{column2_name}`: {coefficient}\")\n",
-    "except ValueError as e:\n",
-    "    print(f\"Error occured: {e}\")"
+    "except (ValueError, Exception) as e:\n",
+    "    print(f\"Error occurred: {e}\")"
    ],
    "metadata": {
     "collapsed": false
@@ -363,7 +362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 189,
    "metadata": {
     "deletable": false
    },
@@ -377,10 +376,15 @@
     }
    ],
    "source": [
-    "def generate_all_correlation_coefficients(csv_file_path):\n",
+    "def generate_all_correlation_coefficients(csv_file):\n",
+    "    \"\"\"\n",
+    "    This function takes the csv file and generate all possible correlations with each and every column in the csv file\n",
+    "    :param csv_file: The CSV file path\n",
+    "    :return: A list of tuples containing all possible correlations with each and every column.\n",
+    "    \"\"\"\n",
     "    # Read the data from the CSV file\n",
-    "    conversion_indicators = [True, True, True, True, True, True, True, True, True, True]\n",
-    "    all_column_data = read_all_csv_data(csv_file_path, conversion_indicators)\n",
+    "    con_indicators = [True, True, True, True, True, True, True, True, True, True]\n",
+    "    all_column_data = read_all_csv_data(csv_file, con_indicators)\n",
     "\n",
     "    # Get the column names\n",
     "    column_names = list(all_column_data.keys())\n",
@@ -389,31 +393,30 @@
     "    all_correlation_coefficients = []\n",
     "\n",
     "    # Iterate through all pairs of columns\n",
-    "    for i in range(len(column_names)):\n",
-    "        for j in range(len(column_names)):\n",
-    "            if i == j:\n",
-    "                continue  # skip the comparison of the same column with itself\n",
+    "    for row_name in range(len(column_names)):\n",
+    "        for col_name in range(len(column_names)):\n",
+    "            # skip the comparison of the same column with itself\n",
+    "            if row_name == col_name:\n",
+    "                continue\n",
     "\n",
-    "            col1_name = column_names[i]\n",
-    "            col1_data = all_column_data[col1_name]\n",
+    "            column1_name = column_names[row_name]\n",
+    "            column1_data = all_column_data[column1_name]\n",
     "\n",
-    "            col2_name = column_names[j]\n",
-    "            col2_data = all_column_data[col2_name]\n",
+    "            column2_name = column_names[col_name]\n",
+    "            column2_data = all_column_data[column2_name]\n",
     "\n",
-    "            # Calculate the Spearman's Rank Correlation Coefficient for the current pair of columns\n",
+    "            # Calculate the Correlation Coefficient for the current pair of columns\n",
     "            try:\n",
-    "                sm_cor_coefficient = sm_correlation_coefficient(col1_data, col2_data)\n",
-    "                all_correlation_coefficients.append((col1_name, col2_name, sm_cor_coefficient))\n",
-    "            except ValueError as e:\n",
-    "                print(f\"A value error occurred: {e}\")\n",
-    "            except Exception as ex:\n",
+    "                sm_cor_coefficient = sm_correlation_coefficient(column1_data, column2_data)\n",
+    "                all_correlation_coefficients.append((column1_name, column2_name, sm_cor_coefficient))\n",
+    "            except (ValueError, Exception) as ex:\n",
     "                print(f\"An error occurred: {ex}\")\n",
     "\n",
     "    return all_correlation_coefficients\n",
     "\n",
-    "# Test the generate_correlation_coefficients function\n",
-    "file_path = 'task1.csv'\n",
-    "result = generate_all_correlation_coefficients(file_path)\n",
+    "# Test the function\n",
+    "file = 'task1.csv'\n",
+    "result = generate_all_correlation_coefficients(file)\n",
     "print(result)\n"
    ]
   },
@@ -438,7 +441,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 187,
+   "execution_count": 205,
    "metadata": {
     "deletable": false
    },
@@ -460,6 +463,11 @@
    ],
    "source": [
     "def print_table(correlations, border_char, columns):\n",
+    "    # check border character length\n",
+    "    if len(border_char) > 1 or not len(border_char):\n",
+    "         raise ValueError(\"Border character should be one single character\")\n",
+    "\n",
+    "\n",
     "    # Find the length of the longest string in the list and add constant to define cell length\n",
     "    longest_string_length = len(max(columns, key=len))\n",
     "    max_cell_length = longest_string_length + 5\n",
@@ -473,16 +481,16 @@
     "        sum_col_lengths += len(name)\n",
     "\n",
     "    sum_col_lengths += len(columns)  * 7\n",
-    "    print(\" \" * max_cell_length + \"$\" * sum_col_lengths)\n",
+    "    print(\" \" * max_cell_length + border_char * sum_col_lengths)\n",
     "\n",
     "    # print the column names\n",
     "    print(\" \" * max_cell_length, end=\"\")\n",
     "    for name in columns:\n",
     "        print(border_char + \" \" * 3 + name + \" \" * 3 , end=\"\")\n",
-    "    print(\"$\")\n",
+    "    print(border_char)\n",
     "\n",
     "    # print header separator\n",
-    "    print(\"$\" * (max_cell_length + sum_col_lengths))\n",
+    "    print(border_char * (max_cell_length + sum_col_lengths))\n",
     "\n",
     "    # print the correlation coefficients\n",
     "    for i, value in enumerate(columns):\n",
@@ -510,11 +518,10 @@
     "                            print(border_char +cell_space + str(correlation_coefficient) + cell_space, end=\"\")\n",
     "\n",
     "\n",
-    "        print(\"$\")\n",
+    "        print(border_char)\n",
     "\n",
     "    # print footer\n",
-    "    # print header separator\n",
-    "    print(\"$\" * (max_cell_length + sum_col_lengths))\n",
+    "    print(border_char * (max_cell_length + sum_col_lengths))\n",
     "\n",
     "\n",
     "file_path = 'task1.csv'\n",
@@ -563,7 +570,34 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "add markdown text here"
+    "\n",
+    "# Development Process Reflective Report\n",
+    "\n",
+    "## Problem Analysis\n",
+    "\n",
+    "Spearman's Rank Correlation Coefficient needed to be determined for all possible pairs of columns in a given dataset. The correlation coefficients also needed to be displayed in a special table, which required some more work. The objective was to create a set of procedures for doing these things.\n",
+    "\n",
+    "## Approach and Challenges\n",
+    "\n",
+    "As the first main task, I have to read a CSV file without using any library. I used python inbuild read file function to read through reach line of the csv file and appltying the relavant logic to get the given column data. To convert values to a number, I used exception handling because if there is a value that cannot be converted to number, that value should be keep as it is.\n",
+    "\n",
+    "Following that, I began the development process by writing code to determine the Spearman's Rank Correlation Coefficient between two data sets. A function to rank the data, taking into account duplicates, had to be developed initially in order to reduce code redundancy. Managing the duplicates and guaranteeing accurate rankings was the main difficulty and I had to check the ranking with MS Excel as well.\n",
+    "\n",
+    "Then, I moved to create a function that can generate a collection of correlation coefficients for a certain dataset. This was accomplished by comparing every pair of columns in the dataset using the correlation coefficient function that had been constructed earlier. During this step, I ran into the difficulty of dealing with repeated and reversed pairs, which had to be addressed to prevent output redundancy.\n",
+    "\n",
+    "Subsequently, a custom table generation function was developed to display the correlation coefficients for a subset of column pairs. The table needed to be formatted with proper padding for readability. The primary challenge in this stage was ensuring the table's cell alignment and properly displaying the compared columns for better comprehension. I used pre-defined padding for both left and right in evey column, based on the character value in longest column name. After may attepms, I got the table as in the requirement. However I have notice that even that table works perfectly with odd number length of the column names but if you have even and odd number length column names at the same time, the table get messed up little bit\n",
+    "\n",
+    "## Reflection\n",
+    "\n",
+    "The final solution used functions to efficiently calculate the Spearman's Rank Correlation Coefficient for every pair of columns in a dataset and present the results in a bespoke table. The project showed the need of modifying code based on output and overcoming problems to fulfil output.\n",
+    "\n",
+    "This taught me:\n",
+    "\n",
+    "1.  Create functions that are flexible and can be easily combined with others for more advanced task.\n",
+    "2.  Consider special cases, like dealing with repeated values in the ranking process and preventing unnecessary repetition in the output.\n",
+    "3.  Focus on proper formatting and clear presentation, as shown by the function that generates a custom table for displaying results.\n",
+    "\n",
+    "While solving complicated problems, iterative improvements, feedback-driven modifications, and readability and presentation are crucial."
    ]
   },
   {
-- 
GitLab