"""
Created on Tue Mar 17 19:27:58 2015
@author: Yury E. Garcia (yury@cimat.mx), CIMAT, Mexico.

This code compares the exponencial model and linear model with bayes factor
The main function is Function_fit, the user should:
load the file, give the value of R0 and the population size and the 
run length for alarm (eg. 3,4,...).

This is an implementation of the techniques presented in:

Yury E. Garcia, J. Andres Christen and Marcos A. Capistran (2015),
"A Bayesian Outbreak Detection Method for Influenza-Like Illness",
Journal of Biomedicine and Biothechnology (revision).

Please check the paper for all mathematical details and notation.

"""
import numpy as np
from scipy import special as spe

#Model's parameters
b1     =1.0/7.0     #recuperation rate
th01   =0.0         #theta_1
mu     =0           #Births and deaths
alpha0 =0.5         #alpha for the a priori

def par_est(outbreak,con,data_real):  
    """ This function makes the final theta estimation when there is an outbreak.
    
        outbreak  :It is generated in show fit (file: BayesFactorFigure) 
                   outbreak saves the positions where the exponential model was true in 
                   agreement with the run length for alarm
        data_real :It is the temporal data than we are analyzing
        con       :It is a variable given in Function_fit
        
        Returns the estimate.
    """
    con = con #con == 1 logged data
    
    if con==1:
        z =np.log(data_real[outbreak])  #Log the data at the outbreak 
    else:
        z     = []
        a     = outbreak[-1]            
        z     = np.log(data_real[a-3:a+1])
   #estimate theta (ie R0), the second entry of Xtheta  DR0
    n     = len(z)
    z     = np.array(z)
    X     = np.array([np.ones(n), (b1+mu)*np.linspace(0,n-1,n)])  #design matrix
    X_T_X = np.dot(X,X.T) 
    inv_X = np.dot(np.linalg.pinv(X_T_X),X)
    Theta = np.dot(inv_X,z)
    bet   =(b1+mu)*(Theta[1]+1.0) #infection rate beta = R0(b1+mu)
    return  bet, Theta  
 
 
def fact1(y, n, theta0, A01,design,beta0):
    """ This Fuction calculates the value of normalization constant p(D).
        It is called in Function_fit:
        y:      Vector with the three data that we are using to compare the models.
        n:      Sliding window (run length).
        theta0: Initial parameters (0 DeltaR0) exponential model, (0 0) linear model
        A01   : A priori covariance matrix.
        design: Design matrix.
        beta0 : beta0 of the prior distribution.
        
        Returns: The nomralization constant.
        """
    
    design_T_design = np.dot(design,design.T)
    A01_inv         = np.linalg.pinv(A01)
    An              = A01_inv + design_T_design
    An_inv          = np.linalg.pinv(An)
    alphan          = alpha0 + n/2.0        
    thetan          = np.dot(An_inv,(np.dot(A01_inv,theta0)+np.dot(design,y)))
    betan           = beta0 +0.5*(np.dot(np.dot(theta0,A01_inv),theta0.T)+np.dot(y,y.T)+np.dot(np.dot(thetan,An_inv),thetan))
    
   
    frac1   = (betan)**(-alphan)
    num1    = beta0**(alpha0)*spe.gamma(alphan)*np.sqrt(np.linalg.det(An))
    den1    = (2*np.pi)**(n/2.0)*spe.gamma(alpha0)*np.sqrt(np.linalg.det(A01))
    frac2   = num1/den1
    BF      = frac1*frac2
    
    return BF
    

    
def Function_fit(n1,n2,N,data,data_real,th02):
    """This is the main function, all initial conditions are calculated and the fact1 is called
        n1, n2: Number of data to compare the models, we recommend to use 3.
        N     : Population size of the data analyzed.
                 data, data_real : Temporal data to be analyzed
        th02  : The apriori expected value of DeltaR0 (R0-1).
        
        Returns: A list in which ach entry represents if the
                 exp model wins (True) or False otherwise
        """
    which_model   = [] #Each entry represents if the exp model wins (True) or False otherwise
    prob_exp      = []
    prob_aaa      = []
    prob_bbb      = []
    
    for l in range(np.array([n1,n2]).max(),len(data)):
        z           = [l-n1,l+1-n1,l+2-n1]       #Data window to make all comparisons
        my          = 0.3*(data[l-n1]+data[l+1-n1]+data[l+2-n1])  #average of the n values
        cov1        = (1.0/3.0)*((data[l-n1]-my)**2+(data[l+1-n1]-my)**2+(data[l+2-n1]-my)**2)  #covariance of the n vlaues
        my1         = 0.3*(np.log(data[l-n1])+np.log(data[l+1-n1])+np.log(data[l+2-n1]))  #average of n logged values 
        cov2        = (1.0/3.0)*((np.log(data[l-n1])-my1)**2+(np.log(data[l+1-n1])-my1)**2+(np.log(data[l+2-n1])-my1)**2) #covariance of the n logged values
        beta1       = 0.5*cov1 #a priori parameters of linear model 
        beta2       = 0.5*cov2 #a priori parameter of exponential model 
        Sigma1      = np.array([[10**2, 0.0],[0.0,2]])  #a priori variance-covariance matrix for linear model
        Sigma2      = np.array([[np.log(10)**2, 0.0],[0.0,1.0/(N)]]) #a priori variance-covariance matrix for exponential
        A01         = Sigma1
        A02         = Sigma2
        design      = np.array([np.ones(n1), np.linspace(0,n1-1,n1)]) #design matrix of linear model 
        design1     = np.array([np.ones(n1), (b1+mu)*np.linspace(0,n1-1,n1)]) #design matrix of exponential model 
        aaa         = fact1(data[l-n1:l]-my, n1,np.array([ 0,th01]),A01,design,beta1)  #normalization constant of linear model
        bbb         = fact1( np.log(data[l-n2:l])-my1, n2,np.array([0,th02]),A02,design1,beta2) #normalization constant of exponential model
        bet,Theta   = par_est(z,1,data_real)          #beta and Theta estimates
        accept2     = Theta[1]>0 and (aaa/bbb)<1      #only compare runs that go up
        which_model.append(accept2)  #True if exp model wins in a upwards run
        prob_exp.append(bbb/(aaa+bbb))
        prob_aaa.append(aaa)
        prob_bbb.append(bbb)            
    which_model= np.asarray(which_model)
    prob_exp = np.asarray(prob_exp)
    
    
    return which_model