From 3818c651f5b452e49b1e8ca186289a9f919b7435 Mon Sep 17 00:00:00 2001
From: amilashanaka <dsa.amilashanaka@gmail.com>
Date: Wed, 28 Dec 2022 22:52:36 +0000
Subject: [PATCH] api

---
 api/run.py | 354 ++++++++++++++++++++---------------------------------
 1 file changed, 132 insertions(+), 222 deletions(-)

diff --git a/api/run.py b/api/run.py
index 90a5b15..1925bd3 100644
--- a/api/run.py
+++ b/api/run.py
@@ -1,296 +1,206 @@
-# import libraries
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-from collections import Counter
-import statistics
-import math
 
-#for LSTM model
-from sklearn.preprocessing import MinMaxScaler
-from keras.models import Sequential
-from keras.layers import Dense, LSTM, Dropout
-
-# ignore warnings
-import warnings
-warnings.filterwarnings("ignore")
-
-from datetime import datetime as dt
-
-import pymysql
-import json
 from app import app
+import pandas as pd
+import datetime as dt
+import numpy as np
 
-from flask import Flask, jsonify, request, make_response
-from flask import flash, request
+from sqlalchemy import create_engine
 
-import datetime
-from werkzeug.security  import generate_password_hash, check_password_hash
-from functools import wraps
-import requests
-import numpy as np
-import pandas as pd
-from sklearn import linear_model
-from datetime import datetime, timedelta
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow import keras
+from keras.models import Sequential 
+from keras.layers import Dense, LSTM, Dropout
 
-from statsmodels.tsa.ar_model import AR
 
-from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
 import matplotlib.pyplot as plt
-plt.rcParams.update({'figure.figsize':(9,7), 'figure.dpi':120})
-import pmdarima as pm
 
-from sqlalchemy import create_engine
+# Globally Declare data set 
 
-# globally declare dataset
 data_set =pd.DataFrame()
 
-# Tranning dataset with
+tranning_set=pd.DataFrame()
 
-tranning_data_set=pd.DataFrame()
+test_result=pd.DataFrame()
 
-# Validation Data Set 
+data_arr_x=np.array([])
+data_arr_y=np.array([])
 
-validate_data_set=pd.DataFrame()
+x_train = np.array([])
+y_train = np.array([])
+x_test = np.array([])
+y_test = np.array([])
 
-# result data set 
+test_predict  = np.array([])
 
-result_data_set_tranning=pd.DataFrame()
+train_ind =0
 
-result_data_set_forecast=pd.DataFrame()
-
-# Lstm model 
-model = Sequential()
 
 scaler = MinMaxScaler()
 
-train_close_len=0
 
-# Support functions 
+num_steps = 60
 
-def read_from_db():
-   
+def read_data_set():
+  # Declare global variable 
   global data_set
+  global tranning_data_set
 
+
+  # Define database connection 
   db_connection_str = 'mysql+pymysql://root:@localhost/csct'
   db_connection = create_engine(db_connection_str)
-  df = pd.read_sql('SELECT * FROM product__demnd', con=db_connection)
-  data_set=df
-  return df
 
+  # Read data in to Data Frame 
+  data_set = pd.read_sql('SELECT * FROM product__demnd', con=db_connection)
+  # Validate Date
+  data_set['Date']= pd.to_datetime(data_set['Date']).dt.date
+  data_set.sort_values('Date', inplace=True)
+  data_set['Date']=data_set['Date'].astype(str)
+ 
 
-def shape_input():
-  global tranning_data_set
-  global train_close_len
-  global scaler
-   # Create new data with only the "OrderDemand" column
-  orderD = tranning_data_set.filter(["OrderDemand"])
-  # Convert the dataframe to a np array
-  orderD_array = orderD.values
-  # See the train data len
-  train_close_len = math.ceil(len(orderD_array) * 0.8)
-  print(train_close_len)
-  # Normalize the data
+ 
+  # Remove Nan values 
   
-  scaled_data = scaler.fit_transform(orderD_array)
-  # Create the training dataset
-  train_data = scaled_data[0 : train_close_len, :]
-  # Create X_train and y_train
-  X_train = []
-  y_train = []
-  for i in range(60, len(train_data)):
-      X_train.append(train_data[i - 60 : i, 0])
-      y_train.append(train_data[i, 0])
-      
-         
-
-  return X_train, y_train,scaled_data,orderD
 
+  # Clean Order Demand 
 
+  data_set['Order_Demand'] = data_set['Order_Demand'].str.replace('(',"")
+  data_set['Order_Demand'] = data_set['Order_Demand'].str.replace(')',"")
+  data_set['Order_Demand'] = data_set['Order_Demand'].astype('int64')
 
+  tranning_data_set = data_set.groupby('Date')['Order_Demand'].sum().reset_index()
+  # Reset index
+  data_set=data_set.set_index(data_set['Date'])
+  data_set.dropna(inplace=True)
  
-@app.route('/setup',methods=['POST'])
-def start():
+  return data_set
+
 
+def reshape():
   global data_set
+  global data_arr_x
+  global data_arr_y
+  global num_steps
   global tranning_data_set
-  global result_data_set_tranning
-  global result_data_set_forecast
-  global train_close_len
   global scaler
 
-  global model
-
-
-
-  df=read_from_db()
-  data_set
-
-  data_set.rename(columns = {'Product_Code': 'ProductCode',
-                       'Product_Category': 'ProductCategory', 
-                       'Order_Demand': 'OrderDemand'}, inplace = True)
-
-
-
-  data_set.dropna(inplace=True)
-
-  data_set.sort_values('Date', ignore_index=True, inplace=True)
+  items=tranning_data_set.filter(['Order_Demand'])
+  item_arr=np.array(items.values)
+  scaled_data = scaler.fit_transform(item_arr)
+  print(scaled_data)
 
 
-  data_set['OrderDemand'] = data_set['OrderDemand'].str.replace('(',"")
-  data_set['OrderDemand'] = data_set['OrderDemand'].str.replace(')',"")
-  data_set['OrderDemand'] = data_set['OrderDemand'].astype('int64')
-
-  #Forecast the Order Demand with LSTM Model
+  
+  data_arr_x, data_arr_y = lstm_data_transform(scaled_data,scaled_data, num_steps=num_steps)
+  print ("The new shape of x is", data_arr_x.shape)
 
-  df = data_set[(data_set['Date']>='2012-01-01') & (data_set['Date']<='2016-12-31')].sort_values('Date', ascending=True)
-  df = df.groupby('Date')['OrderDemand'].sum().reset_index()
-  tranning_data_set=df
 
-  X_train,y_train,scaled_data,orderD=shape_input()
 
-  #  make X_train and y_train np array
-  X_train, y_train = np.array(X_train), np.array(y_train)
+  original_data=scaler.inverse_transform(scaled_data)
+  print(original_data)
 
-  # reshape the data
-  X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
- 
+def split_data():
+  global data_arr_x
+  global data_arr_y
+  global num_steps
 
-    # create the testing dataset
-  test_data = scaled_data[train_close_len - 60 : , :]
-  # create X_test and y_test
-  X_test = []
-  y_test = df.iloc[train_close_len : , :]
-  for i in range(60, len(test_data)):
-      X_test.append(test_data[i - 60 : i, 0])
+  global x_train
+  global y_train
 
-  # convert the test data to a np array and reshape the test data
-  X_test = np.array(X_test)
-  X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
+  global x_test
+  global y_test
 
+  global train_ind
 
-  model.add(LSTM(units=512, return_sequences=True, activation='relu', input_shape=(X_train.shape[1], 1)))
+  train_ind = int(0.8 * len(data_arr_x))
+  x_train = data_arr_x[:train_ind]
+  y_train = data_arr_y[:train_ind]
+  x_test = data_arr_x[train_ind:]
+  y_test = data_arr_y[train_ind:]
 
 
-  model.add(LSTM(units=256, activation='relu', return_sequences=False))
+def lstmmodel():
 
-
-  model.add(Dense(128))
   
-  model.add(Dense(64))
-
-  model.add(Dense(32))
+  global test_predict
 
-  model.add(Dense(1))
+  model = Sequential()
 
+  model.add(LSTM(512, activation='relu', input_shape=(num_steps, 1), 
+                return_sequences=False))
+  model.add(Dense(units=256, activation='relu'))
+  
 
-  # compile the LSTM model
+  model.add(Dense(units=1, activation='relu'))
+  
+  
   model.compile(optimizer="Adam", loss="mean_squared_error", metrics=['mae'])
+  model.fit(x_train, y_train, epochs=25)
 
-  # train the LSTM model
-  model.fit(X_train, y_train,
-            epochs=5,
-            batch_size=32, 
-            verbose=1)
-
-
-  # predict with LSTM model
-  predictions = model.predict(X_test)
-  predictions = scaler.inverse_transform(predictions)
-
-  valid = orderD[train_close_len:]
-  valid["Predictions"] = predictions
-
-  result_data_set_tranning=df[:train_close_len]
-  result_data_set_forecast=df[train_close_len:]
-  result_data_set_forecast["Predictions"] = predictions
-
-
-  return "Setting up completed"
-
-
-start()
-
-@app.route("/result_tranning",methods=['GET'])
-def result_tranning():
-  global result_data_set_tranning
- 
-
-  result_data_set_tranning=result_data_set_tranning.set_index(result_data_set_tranning['Date'])
-
-
+  test_predict = model.predict(x_test)
 
-  return result_data_set_tranning.to_json(orient='records')
+def plot_result():
 
-@app.route('/result_validate',methods=['GET'])
-def result_validate():
-  global result_data_set_forecast
-  result_data_set_forecast=result_data_set_forecast.set_index(result_data_set_forecast['Date'])
-  return result_data_set_forecast.to_json(orient='records')
-
-
-@app.route("/plot",methods=['GET'])
-def plot():
-  global result_data_set_tranning
-  global result_data_set_forecast
-
-  #visualize the data
-  plt.figure(figsize=(16, 8))
-  plt.title("Forecast with LSTM Model")
-  plt.xlabel("Time", fontsize=14)
-  plt.ylabel("Order Demand", fontsize=14)
-  plt.plot(result_data_set_tranning["Date"], result_data_set_tranning["OrderDemand"])
-  plt.plot(result_data_set_forecast["Date"], result_data_set_forecast["OrderDemand"], result_data_set_forecast["Predictions"])
-  plt.legend(["Train", "Validation", "Predictions"], loc="lower right")
+  plt.style.use('ggplot')
+  plt.figure(figsize=(20, 7))
+  plt.plot(y_test, label="True value")
+  plt.plot(test_predict.ravel(), label="Predicted value")
+  plt.legend()
   plt.show()
 
-  return "plot"
-
-@app.route("/forecast",methods=["POST"])
-def forecast():
 
-  # read incomming json data 
-  data=request.get_json()
-  print(data)
 
-  return "forecast"
-
-@app.route("/category",methods=['GET'])
-
-def category():
+def lstm_data_transform(x_data, y_data, num_steps=5):
+    """ Changes data to the format for LSTM training 
+for sliding window approach """
+    # Prepare the list for the transformed data
+    X, y = list(), list()
+    # Loop of the entire data set
+    for i in range(x_data.shape[0]):
+        # compute a new (sliding window) index
+        end_ix = i + num_steps
+        # if index is larger than the size of the dataset, we stop
+        if end_ix >= x_data.shape[0]:
+            break
+        # Get a sequence of data for x
+        seq_X = x_data[i:end_ix]
+        # Get only the last element of the sequency for y
+        seq_y = y_data[end_ix]
+        # Append the list with sequencies
+        X.append(seq_X)
+        y.append(seq_y)
+    # Make final arrays
+    x_array = np.array(X)
+    y_array = np.array(y)
+    return x_array, y_array
+
+@app.route('/start',methods=['GET'])
+def start():
   global data_set
-  data=data_set['ProductCategory'].value_counts()
-  return data.to_json()
 
-@app.route("/warehouse",methods=['GET'])
-def warehouse():
-  global data_set
-  data=data_set['Warehouse'].value_counts()
-  return data.to_json()
+  print(data_set.info())
 
+  return data_set.to_json(orient='records')
 
-@app.route("/by_year",methods=['GET'])
+@app.route("/result_tranning",methods=['GET'])
+def result_tranning():
+  global test_result
 
-def by_year():
 
-  global data_set
-  df = data_set[['OrderDemand', 'Year']].groupby(["Year"]).sum().reset_index().sort_values(by='Year', ascending=False)
-  return df.to_json()
+  test_len=len(tranning_data_set)-len(test_predict)
+  test_result=tranning_data_set[test_len:]
+  test_result['Predictions']=test_predict
+  test_result['OrderDemand']=y_test
+  # test_result=test_result.set_index(test_result['Date'])
+  return test_result.to_json(orient='records')
 
-@app.route("/monthly",methods=['GET'])
-def monthly():
-  global data_set
-  temp_data = data_set.copy()
-  temp_data.Month.replace([1,2,3,4,5,6,7,8,9,10,11,12], ['Jan', 'Feb', 'Mar', 'Apr', 'May',
-                                                        'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], inplace=True)
-  df = temp_data[['OrderDemand',
-                  'Month', 'Year',]].groupby(["Year",
-                                              "Month"]).sum().reset_index().sort_values(by=['Year',
-                                                                                            'Month'], ascending=False)
-  df=df.T
-  return df.to_json()
 
  
+read_data_set()
+reshape()
+split_data()
+lstmmodel()
+plot_result()
+
 if __name__ == "__main__":
     app.run()
\ No newline at end of file
-- 
GitLab