'''
Idea:

A training data set for anomaly detection exist. This data contains velocity (v), temperature (t) and pressure (p)
It is assumed that there is a linear relationship between v, t and p:

v = beta0 + beta1*t + beta2*p + error

From the training data we use least squares to estimate the beta-values and the standard deviation of the error
The training data is assumed to represent "normal operation"

Then we start collecting real-time data, also on the format [v,t,p]

For each new observation we calculate the deviation (residuals) between the estimated v 
from the regression model with the observed values of v
We issue an alert when 3 residuals in a row are larger than one standard deviation
'''
import matplotlib.pyplot as plt
import numpy as np
import csv
#
# Some utility functions are defined in the beginning...
#
def getCSV(csvName):  # Read data from csv-file, ignore first row, i.e., header
  first = True
  ret=[]
  with open(csvName, mode ='r')as file:
    csvFile = csv.reader(file)
    for lines in csvFile:
      if first:
        first = False
      else:
        row = []
        for el in lines:
          row.append(float(el))
        ret.append(row)
  return ret      

def extractColumns(A,cols,one=0):
    '''    
    Extract specific columns into a new matrix. <cols> = array of column numbers to extract
    # The flag <one=1> is used if the matrix should have the first column filled with "1"
    '''
    nRows = len(A)
    nCols = len(cols)
    ret = np.zeros([nRows,nCols+one])
    for row in range(nRows):
        rData = A[row]
        ret[row,0]=1 # In case of filling matrix with first column = 1
        i = 0
        for col in cols:
            ret[row,i+one] = rData[col]
            i += 1
    return ret

def extractColumn(A,col):
    '''    
    Extract one specific columns into a new vector
    '''
    nRows = len(A)   
    ret = np.zeros([nRows])
    for row in range(nRows):        
        ret[row]=A[row][col]
    return ret

def iif(a, ifTrue, ifFalse):  # Standard IIF-function, 
    if a:
        return ifTrue
    else:
        return ifFalse

def shift(vec,newVal=0):  # [x1,x2,x3,..,xn] ==> [x2,x3,..,xn,newVal]
    ret = vec[1:]    
    ret.append(newVal)
    return ret

trData = getCSV('NormalOperation.csv')     # Read training data from csv file
X = extractColumns(trData,[1,2],1)         # Create "Design matrix"
y = extractColumn(trData,0)                # Create "y-vector"
beta = np.linalg.lstsq(X,y, rcond=None)[0] # Regression analysis, i.e., Least Square
ss = 0 # square sum, used to calculate standard deviation, i.e., square root of "sum-of-squares"/"N - #of parameters" 
for i, x_i in enumerate(X):
    yPred = np.dot(beta,x_i)
    ss += (y[i]-yPred)**2
SD = np.sqrt(ss/(len(X)-len(beta)))
print("Regression parameters:",beta)
print("Standard error:",SD)
last3=[0,0,0] # Keep track of last 3 observations, 0 = within +/- 1 SD, 1 = outside
twData = getCSV('RunToFailure.csv')  # Read real-time data from csv file
twX = extractColumns(twData,[1,2],1) # Extract "Design matrix" from "digital twin"
twy = extractColumn(twData,0)        # Extract y-vector from "digital twin"
x=[]
y=[]
z=[]
print("\ni  Deviation Dev/St.error sum3 Alert")
print  ("** ********* ************ **** *****")
for i ,X in enumerate(twX):
    yPred = np.dot(beta,X)
    x.append(i)
    y.append(twy[i])
    z.append(yPred)
    residual =round(np.abs(twy[i]-yPred),6)
    last3 = shift(last3,iif(residual/SD>1,1,0)) 
    print(f"{i:<2}",f"{residual:<15}", f"{int(residual/SD):<7}",f"{sum(last3):<3}", iif(sum(last3)==3,"Alert","OK"))

plt.plot(x, y, 'g', label='v')
plt.plot(x, z, 'r', label='v-predicted')
plt.xlabel(r'$t$')
plt.title('Acual and predicted velocity')
plt.legend()
plt.show()








