Skip to content
Snippets Groups Projects
Commit 01887636 authored by UWE_ 23086369_2023's avatar UWE_ 23086369_2023
Browse files

task1.csv

parent e1971908
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# UFCFVQ-15-M Programming for Data Science # UFCFVQ-15-M Programming for Data Science
# Programming Task 1 # Programming Task 1
## Student Id: ## Student Id:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.1 -Find the arithmetic mean using function ### Requirement FR1.1 -Find the arithmetic mean using function
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
numbers_list = [ numbers_list = [
29, 17, 28, 6, 14, 7, 4, 27, 21, 15, 29, 17, 28, 6, 14, 7, 4, 27, 21, 15,
10, 16, 24, 26, 3, 11, 13, 8, 23, 9, 10, 16, 24, 26, 3, 11, 13, 8, 23, 9,
0, 22, 12, 2, 18, 19, 5, 1, 20, 25 0, 22, 12, 2, 18, 19, 5, 1, 20, 25
] ]
# Find the average of a given list of numbers # Find the average of a given list of numbers
def arithmetic_mean(numbers): def arithmetic_mean(numbers):
total_sum = sum(numbers) total_sum = sum(numbers)
mean_value = total_sum / len(numbers) mean_value = total_sum / len(numbers)
return mean_value return mean_value
# Execute the function and save the resulting value # Execute the function and save the resulting value
mean_value = arithmetic_mean(numbers_list) mean_value = arithmetic_mean(numbers_list)
# Display the average value(mean) # Display the average value(mean)
print(f"The mean value is: {mean_value}") print(f"The mean value is: {mean_value}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.2 - Find the standard deviation using function ### Requirement FR1.2 - Find the standard deviation using function
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def find_std(numbers, mean): def find_std(numbers, mean):
# Find the variance by averaging the squared differences, from the mean. # Find the variance by averaging the squared differences, from the mean.
# Then take the root to obtain the standard deviation. # Then take the root to obtain the standard deviation.
std_value = (sum([(x - mean) ** 2 for x in numbers]) / len(numbers)) ** 0.5 std_value = (sum([(x - mean) ** 2 for x in numbers]) / len(numbers)) ** 0.5
return (std_value) return (std_value)
# Determine the mean of the numbers_list using the function we defined earlier. # Determine the mean of the numbers_list using the function we defined earlier.
mean_value = arithmetic_mean(numbers_list) mean_value = arithmetic_mean(numbers_list)
# Calculate the deviation using the find_std function and the mean_value. # Calculate the deviation using the find_std function and the mean_value.
std_value = find_std(numbers_list, mean_value) std_value = find_std(numbers_list, mean_value)
# Print the calculated standard deviation # Print the calculated standard deviation
print(f"The standard deviation is: {std_value}") print(f"The standard deviation is: {std_value}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.3 - Find the min/max values using functions ### Requirement FR1.3 - Find the min/max values using functions
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
numbers_list = [ numbers_list = [
29, 17, 28, 6, 14, 7, 4, 27, 21, 15, 29, 17, 28, 6, 14, 7, 4, 27, 21, 15,
10, 16, 24, 26, 3, 11, 13, 8, 23, 9, 10, 16, 24, 26, 3, 11, 13, 8, 23, 9,
0, 22, 12, 2, 18, 19, 5, 1, 20, 25 0, 22, 12, 2, 18, 19, 5, 1, 20, 25
] ]
# Create two functions that will allow you to find the min/max values, in a given list. # Create two functions that will allow you to find the min/max values, in a given list.
def min_value(numbers): def min_value(numbers):
return min(numbers) return min(numbers)
def max_value(numbers): def max_value(numbers):
return max(numbers) return max(numbers)
# Find the min/max values within the list # Find the min/max values within the list
The_min_value = min_value(numbers_list) The_min_value = min_value(numbers_list)
The_max_value = max_value(numbers_list) The_max_value = max_value(numbers_list)
# Display the result # Display the result
print(f"The minimum value is: {The_min_value}") print(f"The minimum value is: {The_min_value}")
print(f"The maximum value is: {The_max_value}") print(f"The maximum value is: {The_max_value}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.4 - Find the 25th percentile using functions ### Requirement FR1.4 - Find the 25th percentile using functions
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
numbers_list = [ numbers_list = [
29, 17, 28, 6, 14, 7, 4, 27, 21, 15, 29, 17, 28, 6, 14, 7, 4, 27, 21, 15,
10, 16, 24, 26, 3, 11, 13, 8, 23, 9, 10, 16, 24, 26, 3, 11, 13, 8, 23, 9,
0, 22, 12, 2, 18, 19, 5, 1, 20, 25 0, 22, 12, 2, 18, 19, 5, 1, 20, 25
] ]
# Function to calculate a percentile within a given list of numbers # Function to calculate a percentile within a given list of numbers
def calculate_percentile(numbers, percentile): def calculate_percentile(numbers, percentile):
sorted_list = sorted(numbers) sorted_list = sorted(numbers)
index = int((len(sorted_list) - 1) * (percentile / 100.0)) index = int((len(sorted_list) - 1) * (percentile / 100.0))
return sorted_list[index] return sorted_list[index]
# Calculate the 25th percentile of the list # Calculate the 25th percentile of the list
percentile_25 = calculate_percentile(numbers_list, 25) percentile_25 = calculate_percentile(numbers_list, 25)
# Display the 25th percentile value # Display the 25th percentile value
print(f"The 25th percentile is: {percentile_25}") print(f"The 25th percentile is: {percentile_25}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.5 - Find the 50th percentile using functions ### Requirement FR1.5 - Find the 50th percentile using functions
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
numbers_list = [ numbers_list = [
29, 17, 28, 6, 14, 7, 4, 27, 21, 15, 29, 17, 28, 6, 14, 7, 4, 27, 21, 15,
10, 16, 24, 26, 3, 11, 13, 8, 23, 9, 10, 16, 24, 26, 3, 11, 13, 8, 23, 9,
0, 22, 12, 2, 18, 19, 5, 1, 20, 25 0, 22, 12, 2, 18, 19, 5, 1, 20, 25
] ]
# Function to calculate a percentile within a given list of numbers # Function to calculate a percentile within a given list of numbers
def calculate_percentile(numbers, percentile): def calculate_percentile(numbers, percentile):
sorted_list = sorted(numbers) sorted_list = sorted(numbers)
index = int((len(sorted_list) - 1) * (percentile / 100.0)) index = int((len(sorted_list) - 1) * (percentile / 100.0))
return sorted_list[index] return sorted_list[index]
# Calculate the percentile_50, which is also known as the median using the function we already have. # Calculate the percentile_50, which is also known as the median using the function we already have.
percentile_50 = calculate_percentile(numbers_list, 50) percentile_50 = calculate_percentile(numbers_list, 50)
print(f"The 50th percentile is: {percentile_50}") print(f"The 50th percentile is: {percentile_50}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.6 - Find the 75th percentile using functions ### Requirement FR1.6 - Find the 75th percentile using functions
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
numbers_list = [ numbers_list = [
29, 17, 28, 6, 14, 7, 4, 27, 21, 15, 29, 17, 28, 6, 14, 7, 4, 27, 21, 15,
10, 16, 24, 26, 3, 11, 13, 8, 23, 9, 10, 16, 24, 26, 3, 11, 13, 8, 23, 9,
0, 22, 12, 2, 18, 19, 5, 1, 20, 25 0, 22, 12, 2, 18, 19, 5, 1, 20, 25
] ]
# Function to calculate a percentile within a given list of numbers # Function to calculate a percentile within a given list of numbers
def calculate_percentile(numbers, percentile): def calculate_percentile(numbers, percentile):
sorted_list = sorted(numbers) sorted_list = sorted(numbers)
index = int((len(sorted_list) - 1) * (percentile / 100.0)) index = int((len(sorted_list) - 1) * (percentile / 100.0))
return sorted_list[index] return sorted_list[index]
# Calculate the percentile_75, using the function we already have. # Calculate the percentile_75, using the function we already have.
percentile_75 = calculate_percentile(numbers_list, 75) percentile_75 = calculate_percentile(numbers_list, 75)
print(f"The 75th percentile is: {percentile_75}") print(f"The 75th percentile is: {percentile_75}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.7 - Print the set of summary statistics in tabular form ### Requirement FR1.7 - Print the set of summary statistics in tabular form
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# A summary of the statistics, including the standard deviation, minimum and maximum values, as well, as percentiles. # A summary of the statistics, including the standard deviation, minimum and maximum values, as well, as percentiles.
header = f"{'Statistic':<15}{'Value':>10}" header = f"{'Statistic':<15}{'Value':>10}"
separator = '-' * len(header) separator = '-' * len(header)
print(header) print(header)
print(separator) print(separator)
print(f"{'Mean':<15}{mean_value:>10.2f}") print(f"{'Mean':<15}{mean_value:>10.2f}")
print(f"{'STD':<15}{std_value:>10.2f}") print(f"{'STD':<15}{std_value:>10.2f}")
print(f"{'Min':<15}{The_min_value:>10}") print(f"{'Min':<15}{The_min_value:>10}")
print(f"{'Max':<15}{The_max_value:>10}") print(f"{'Max':<15}{The_max_value:>10}")
print(f"{'25%':<15}{percentile_25:>10}") print(f"{'25%':<15}{percentile_25:>10}")
print(f"{'50%':<15}{percentile_50:>10}") print(f"{'50%':<15}{percentile_50:>10}")
print(f"{'75%':<15}{percentile_75:>10}") print(f"{'75%':<15}{percentile_75:>10}")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.8 - Read data from a file into memory ### Requirement FR1.8 - Read data from a file into memory
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# The file path # The file path
file_pathname = '/Users/mscdatascience/Documents/assignment-PDS/mohammad_alsuulaimani_uwe_23086369_2023/task.dat' file_pathname = '/Users/mscdatascience/Documents/assignment-PDS/mohammad_alsuulaimani_uwe_23086369_2023/task.dat'
# Retrieve the data, from the file and add each line as an element, in the list. # Retrieve the data, from the file and add each line as an element, in the list.
with open(file_pathname, 'r') as file: with open(file_pathname, 'r') as file:
data_values = [line.strip() for line in file] data_values = [line.strip() for line in file]
# Display the list of data values # Display the list of data values
print(data_values) print(data_values)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.9 - Design and build a data structure to store CSV data in memory ### Requirement FR1.9 - Design and build a data structure to store CSV data in memory
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Reading the CSV file and filling up the data structure.
csv_data = {}
with open('/Users/mscdatascience/Documents/assignment-PDS/mohammad_alsuulaimani_uwe_23086369_2023/task1.csv', 'r') as file:
header = file.readline().strip().split(',')
for column in header:
csv_data[column] = []
for line in file:
values = line.strip().split(',')
for i, value in enumerate(values):
csv_data[header[i]].append(value)
# To confirm display the first few items, from every list in the dictionary.
{key: values[:3] for key, values in csv_data.items()}
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.10 - Create a generic statistical summary function ### Requirement FR1.10 - Create a generic statistical summary function
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# add code here # add code here
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.11 Adherence to good coding standards ### Requirement FR1.11 Adherence to good coding standards
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Requirement FR1.12 - Process Development report ### Requirement FR1.12 - Process Development report
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# write here # write here
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
##### MARK: ##### MARK:
#### FEEDBACK: #### FEEDBACK:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment