diff --git a/UFCFVQ-15-M_Programming_Task_1_submit.ipynb b/UFCFVQ-15-M_Programming_Task_1_submit.ipynb index 801e6fab0097532bf987b8c663188054f572bef6..9d4d2e7392662aacc6c5d17cf070600db98f5175 100644 --- a/UFCFVQ-15-M_Programming_Task_1_submit.ipynb +++ b/UFCFVQ-15-M_Programming_Task_1_submit.ipynb @@ -486,76 +486,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### <b>`Pseudocode:`</b> \n", - "\n", - "\n", - "<font color = red> Questions: </font> \n", - "\n", - "* are there any limitations to character usage - probably not\n", - "* what is the source of the list of tuples? - probably doesn't matter - i.e. output from FR5 or variable storing list of tuples or list of tuples\n", - "* columns to include?? - not sure what this means yet\n", - "\n", - "* doesn't the list of tuples parameter determine which columns are included? \n", - " * may need to clarify this!\n", - "\n", - "- <b> TBD </b>\n", - "\n", - "<b>`Input:`</b> \n", - "\n", - "* list of tuples, e.g.\n", - " * var1, var1, pcc11\n", - " * var1, var2, pcc12\n", - " * var1, var3, pcc13\n", - " * var1, var4, pcc14\n", - " * var2, var3, pcc23\n", - " * var2, var4, pcc\n", - " * var3, var4, pcc\n", - " * var4, var4, pcc\n", - "\n", - "| | | | |\n", - "|----|-----|-----|-----|\n", - "| |var1 |var2 |var3 |\n", - "|var1|pcc11|pcc12|pcc13|\n", - "|var2|pcc21|pcc22|pcc23|\n", - "|var3|pcc31|pcc32|pcc33|\n", - "\n", - "tuple = [(var1, var1, pcc11), (var1, var2, pcc12)]\n", - "variable list = [var1, var2, var3]\n", - "\n", - "row header = iterate over variable list\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "<b>`Parameters:` </b> \n", - "* source of list of correlation coefficient tuples\n", - "* border character to use\n", - "* columns to include\n", - "\n", - "<b>`Output:`</b>\n", - "\n", - "* nicely printed ascii / dot matrix style table\n", - "* formatted\n", - "* padded / spaced\n", - "* legible\n", - "* footer??\n", - "* title??\n", - "\n", - "<b>`Steps: `</b>\n", - "\n", - "1. No idea, yet...read about string formatting, probably.\n", - "2. list of tuples\n", - " * get variables into list (for header row and first element of each data row)\n", - "3. enumerate over variable list for header row\n", - "4. maybe get data into the right format to iterate over rows\n", - "\n", - "first unique element of each tuple = column headers\n", - "iterate over elements 2, 3 for each tuple to print table\n", - "\n", + "DELETE this cell and the next cell before submitting\n", "\n", + "TODO\n", "\n", - "READ : https://docs.python.org/3/library/string.html#formatspec" + "Test with FR5 output\n" ] }, { @@ -564,323 +499,115 @@ "metadata": {}, "outputs": [], "source": [ - "#test tuple based on appendix example\n", + "#DELETE THIS CELL BEFORE SUBMISSION\n", + "# \n", + "# test tuple based on appendix example\n", "\n", "tup_list = [('Glucose', 'Glucose', 1), ('BP', 'Glucose', 0.1429), ('BMI', 'Glucose', 0.0584), ('Age', 'Glucose', 0.5328), ('Glucose','BP', 0.1429), ('BP', 'BP', 1), ('BMI', 'BP', -0.4522), ('Age', 'BP', 0.4194), ('Glucose', 'BMI', 0.0584), ('BP', 'BMI', -0.4522), ('BMI', 'BMI', 1), ('Age', 'BMI', -0.3847), ('Glucose', 'Age', 0.5328), ('BP', 'Age', 0.4194), ('BMI', 'Age', -0.3847), ('Age', 'Age', 1)]" ] }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 251, "metadata": {}, "outputs": [], "source": [ - "# step by step building top of table\n", - "\n", - "def table_printer(tup_list, columns = sorted(set([x[0] for x in tup_list])), pad_char = '*'):\n", + "def max_col_width(tup_list):\n", " '''\n", - " Function which takes a list of tuples, optional padding character, and columns (??) and prints a the output as a table'''\n", - "\n", - " col_headers = sorted(set([x[0] for x in tup_list])) # create list of unique column headers\n", - " row_headers = sorted(set([x[1] for x in tup_list])) # create list of unique row headers (same as cols)\n", - "\n", - "\n", - " \n", - " table_str = ' ' * 15 + pad_char * (15 * len(col_headers)) + '\\n' # first table border\n", - " table_str += ' ' * 15 # add 15 spaces to start table string\n", - "\n", - " for col in col_headers:\n", - " table_str += f\"{col:^15}\" # add each column header to start table string\n", - " table_str += '\\n' # add new line after column headers\n", - " table_str += ' ' * 15 + pad_char * (15 * len(col_headers)) + '\\n' #\n", - "\n", - " for row in row_headers:\n", - " table_str += f\"{row:^15}\"+pad_char # add row header to table string\n", - " for col in col_headers:\n", - " # Get the corresponding value (3rd element of tuple) for the current row and column; if no value, use '-'\n", - " r_val = next((x[2] for x in tup_list if x[0] == col and x[1] == row), '-') \n", - " table_str += f\"{r_val:^15}\"+pad_char # add r_val to table string\n", - " \n", - " table_str += '\\n' # add new line after each row\n", - " table_str += ' ' * 15 + pad_char * (15 * len(col_headers)) + '\\n' # add bottom of table to match top\n", + " Function to calculate the maximum column width for a list of tuples'''\n", + " max_cols = 0\n", + " for row in tup_list:\n", + " max_cols = max(max_cols, len(row))\n", "\n", - " print(table_str)\n" + " col_widths = [0] * max_cols\n", + " for row in tup_list:\n", + " for col, value in enumerate(row):\n", + " col_widths[col] = max(col_widths[col], len(str(value)))\n", + " return max(col_widths)" ] }, { "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " ************************************************************\n", - " Age BMI BP Glucose \n", - " ************************************************************\n", - " Age * 1 * -0.3847 * 0.4194 * 0.5328 *\n", - " BMI * -0.3847 * 1 * -0.4522 * 0.0584 *\n", - " BP * 0.4194 * -0.4522 * 1 * 0.1429 *\n", - " Glucose * 0.5328 * 0.0584 * 0.1429 * 1 *\n", - " ************************************************************\n", - "\n" - ] - } - ], - "source": [ - "table_printer(tup_list, '+')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "TODO\n", - "\n", - "table design with string formatting:\n", - "get max column width and use in the format string\n", - "* extend the second annd last borders to cover whole table\n", - "* incorporate the last row border character so that it is aligned\n", - "* print a footer?\n", - "\n", - "new parameter - columns to include? default should be all columns\n", - "- is this arguments, keyword arguments, or both? investigate and test\n" - ] - }, - { - "cell_type": "code", - "execution_count": 98, + "execution_count": 254, "metadata": {}, "outputs": [], "source": [ - "# step by step building top of table\n", - "\n", - "def table_printer_with_col_option(tup_list, *col_headers, pad_char = '*'):\n", + "def FR6_print_table(tup_list, *col_headers, pad_char = '*'):\n", + " '''\n", + " Function which takes a list of tuples, columns to include (as *arguments) and optional single padding character (defaulted to '*') as parameters. The padding character is used to create a table with a border.\n", " '''\n", - " Function which takes a list of tuples, optional padding character, and columns (??) and prints a the output as a table'''\n", "\n", + " # if no column headers are provided, use all unique column headers from tup_list\n", " if not col_headers:\n", " col_headers = sorted(set([x[0] for x in tup_list]))\n", " else:\n", " col_headers = col_headers\n", - "\n", + " # create list of unique row headers (same as cols)\n", " row_headers = col_headers\n", - " #row_headers = sorted(set([x[1] for x in tup_list])) # create list of unique row headers (same as cols)\n", - "\n", - "\n", " \n", - " table_str = ' ' * 15 + pad_char * (15 * len(col_headers)) + '\\n' # first table border\n", - " table_str += ' ' * 15 # add 15 spaces to start table string\n", + " # calculate maximum column width in the data\n", + " max_width = int(max_col_width(tup_list) * 3)\n", + " \n", + " # create table string with top border based on padding character and maximum column width\n", + " table_str = ' ' * max_width + pad_char * (max_width * (len(col_headers))) + pad_char * (len(col_headers)+1)+ '\\n' \n", + " table_str += ' ' * max_width \n", "\n", + " # add column headers to table string, using padding character and maximum column width\n", " for col in col_headers:\n", - " table_str += f\"{col:^15}\" # add each column header to start table string\n", - " table_str += '\\n' # add new line after column headers\n", - " table_str += ' ' * 15 + pad_char * (15 * len(col_headers)) + '\\n' #\n", + " table_str += f\"{col:^{max_width+1}}\" \n", + " \n", + " table_str += '\\n' \n", + " table_str += ' ' * max_width + pad_char * (max_width * (len(col_headers))) + pad_char * (len(col_headers)+1)+'\\n' \n", "\n", + " # add row headers and values to table string, using padding character and maximum column width\n", " for row in row_headers:\n", - " table_str += f\"{row:^15}\"+pad_char # add row header to table string\n", - " for col in col_headers:\n", - " # Get the corresponding value (3rd element of tuple) for the current row and column; if no value, use '-'\n", + " table_str += f\"{row:^{max_width}}\"+pad_char \n", + " \n", + " # Get the corresponding value (3rd element of tuple) for the current row and column; if no value, use '-'\n", + " for col in col_headers: \n", " r_val = next((x[2] for x in tup_list if x[0] == col and x[1] == row), '-') \n", - " table_str += f\"{r_val:^15}\"+pad_char # add r_val to table string\n", + " \n", + " # if value is positive, add a space to the left of the value to keep the table aligned\n", + " if r_val >= 0:\n", + " table_str += f\" {r_val:^{max_width-1}}\" + pad_char \n", + " else:\n", + " table_str += f\"{r_val:^{max_width}}\" + pad_char\n", " \n", - " table_str += '\\n' # add new line after each row\n", - " table_str += ' ' * 15 + pad_char * (15 * len(col_headers)) + '\\n' # add bottom of table to match top\n", + " table_str += '\\n' \n", "\n", - " print(table_str)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n", - " Glucose Age BMI \n", - " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n", - " Glucose % 1 % 0.5328 % 0.0584 %\n", - " Age % 0.5328 % 1 % -0.3847 %\n", - " BMI % 0.0584 % -0.3847 % 1 %\n", - " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n", - "\n" - ] - } - ], - "source": [ - "table_printer_with_col_option(tup_list, 'Glucose', 'Age', 'BMI', pad_char = '%')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 152, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Glucose#Glucose# 1#\n", - " BP#Glucose# 0.1429#\n", - " BMI#Glucose# 0.0584#\n", - " Age#Glucose# 0.5328#\n", - "Glucose# BP# 0.1429#\n", - " BP# BP# 1#\n", - " BMI# BP#-0.4522#\n", - " Age# BP# 0.4194#\n", - "Glucose# BMI# 0.0584#\n", - " BP# BMI#-0.4522#\n", - " BMI# BMI# 1#\n", - " Age# BMI#-0.3847#\n", - "Glucose# Age# 0.5328#\n", - " BP# Age# 0.4194#\n", - " BMI# Age#-0.3847#\n", - " Age# Age# 1#\n" - ] - } - ], - "source": [ - "data = tup_list\n", - "\n", - "max_cols = 0\n", - "for row in data:\n", - " max_cols = max(max_cols, len(row))\n", - "\n", - "col_widths = [0] * max_cols\n", - "for row in data:\n", - " for col, value in enumerate(row):\n", - " col_widths[col] = max(col_widths[col], len(str(value)))\n", - "\n", - "for col, value in enumerate(data[0]):\n", - " print(str(value).rjust(col_widths[col]), end='#')\n", - "print()\n", + " # add bottom border to table string, using padding character and maximum column width \n", + " table_str += ' ' * max_width + pad_char * max_width * len(col_headers) + pad_char * (len(col_headers)+1)+ '\\n\\n' \n", + " \n", + " # add caption for table\n", + " table_str += ' ' * max_width + \"Pearson's Correlation Coefficient for %s\" % (col_headers,)\n", "\n", - " # Print the data rows\n", - "for row in data[1:]:\n", - " for col, value in enumerate(row):\n", - " print(str(value).rjust(col_widths[col]), end='#')\n", - " print()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3\n", - "[7, 7, 7]\n" - ] - } - ], - "source": [ - "print(max_cols)\n", - "print(col_widths)" - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "metadata": {}, - "outputs": [], - "source": [ - "def print_table(data, padding):\n", - " # First, find the maximum number of columns in any row\n", - " max_cols = 0\n", - " for row in data:\n", - " max_cols = max(max_cols, len(row))\n", - "\n", - " # Then, for each column, find the maximum width of any value in that column\n", - " col_widths = [0] * max_cols\n", - " for row in data:\n", - " for col, value in enumerate(row):\n", - " col_widths[col] = max(col_widths[col], len(str(value)))\n", - "\n", - " # Print the header row\n", - " for col, value in enumerate(data[0]):\n", - " print(str(value).rjust(col_widths[col]), end=padding)\n", - " print()\n", - "\n", - " # Print a horizontal line\n", - " for col, width in enumerate(col_widths):\n", - " print(\"-\" * width, end=padding)\n", - " print()\n", - "\n", - " # Print the data rows\n", - " for row in data[1:]:\n", - " for col, value in enumerate(row):\n", - " print(str(value).rjust(col_widths[col]), end=padding)\n", - " print()\n", - "\n", - " # Print a horizontal line\n", - " for col, width in enumerate(col_widths):\n", - " print(\"-\" * width, end=padding)\n", - " print()\n", - "\n", - " # Print the total row\n", - " print(\"Total\", end=padding)\n", - " for col in range(1, max_cols):\n", - " col_sum = 0\n", - " for row in data[1:]:\n", - " if col < len(row):\n", - " col_sum += row[col]\n", - " print(str(col_sum).rjust(col_widths[col]), end=padding)\n", - " print()" + " # print table string\n", + " print(table_str)\n" ] }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 255, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "GlucosexGlucosex 1x\n", - "-------x-------x-------x\n", - " BPxGlucosex 0.1429x\n", - " BMIxGlucosex 0.0584x\n", - " AgexGlucosex 0.5328x\n", - "Glucosex BPx 0.1429x\n", - " BPx BPx 1x\n", - " BMIx BPx-0.4522x\n", - " Agex BPx 0.4194x\n", - "Glucosex BMIx 0.0584x\n", - " BPx BMIx-0.4522x\n", - " BMIx BMIx 1x\n", - " Agex BMIx-0.3847x\n", - "Glucosex Agex 0.5328x\n", - " BPx Agex 0.4194x\n", - " BMIx Agex-0.3847x\n", - " Agex Agex 1x\n", - "-------x-------x-------x\n", - "Totalx" - ] - }, - { - "ename": "TypeError", - "evalue": "unsupported operand type(s) for +=: 'int' and 'str'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [139], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m print_table(tup_list,\u001b[39m\"\u001b[39;49m\u001b[39mx\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", - "Cell \u001b[1;32mIn [132], line 40\u001b[0m, in \u001b[0;36mprint_table\u001b[1;34m(data, padding)\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[39mfor\u001b[39;00m row \u001b[39min\u001b[39;00m data[\u001b[39m1\u001b[39m:]:\n\u001b[0;32m 39\u001b[0m \u001b[39mif\u001b[39;00m col \u001b[39m<\u001b[39m \u001b[39mlen\u001b[39m(row):\n\u001b[1;32m---> 40\u001b[0m col_sum \u001b[39m+\u001b[39;49m\u001b[39m=\u001b[39;49m row[col]\n\u001b[0;32m 41\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mstr\u001b[39m(col_sum)\u001b[39m.\u001b[39mrjust(col_widths[col]), end\u001b[39m=\u001b[39mpadding)\n\u001b[0;32m 42\u001b[0m \u001b[39mprint\u001b[39m()\n", - "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +=: 'int' and 'str'" + " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", + " Age BMI BP \n", + " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", + " Age + 1 + -0.3847 + 0.4194 +\n", + " BMI + -0.3847 + 1 + -0.4522 +\n", + " BP + 0.4194 + -0.4522 + 1 +\n", + " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n", + "\n", + " Pearson's Correlation Coefficient for ('Age', 'BMI', 'BP')\n" ] } ], "source": [ - "print_table(tup_list,\"x\")" + "FR6_print_table(tup_list, 'Age', 'BMI', 'BP', pad_char = '+')\n" ] }, {