""" Created on Tue Mar 17 19:27:58 2015 @author: Yury E. Garcia (yury@cimat.mx), CIMAT, Mexico. This code compares the exponencial model and linear model with bayes factor The main function is Function_fit, the user should: load the file, give the value of R0 and the population size and the run length for alarm (eg. 3,4,...). This is an implementation of the techniques presented in: Yury E. Garcia, J. Andres Christen and Marcos A. Capistran (2015), "A Bayesian Outbreak Detection Method for Influenza-Like Illness", Journal of Biomedicine and Biothechnology (revision). Please check the paper for all mathematical details and notation. """ import numpy as np from scipy import special as spe #Model's parameters b1 =1.0/7.0 #recuperation rate th01 =0.0 #theta_1 mu =0 #Births and deaths alpha0 =0.5 #alpha for the a priori def par_est(outbreak,con,data_real): """ This function makes the final theta estimation when there is an outbreak. outbreak :It is generated in show fit (file: BayesFactorFigure) outbreak saves the positions where the exponential model was true in agreement with the run length for alarm data_real :It is the temporal data than we are analyzing con :It is a variable given in Function_fit Returns the estimate. """ con = con #con == 1 logged data if con==1: z =np.log(data_real[outbreak]) #Log the data at the outbreak else: z = [] a = outbreak[-1] z = np.log(data_real[a-3:a+1]) #estimate theta (ie R0), the second entry of Xtheta DR0 n = len(z) z = np.array(z) X = np.array([np.ones(n), (b1+mu)*np.linspace(0,n-1,n)]) #design matrix X_T_X = np.dot(X,X.T) inv_X = np.dot(np.linalg.pinv(X_T_X),X) Theta = np.dot(inv_X,z) bet =(b1+mu)*(Theta[1]+1.0) #infection rate beta = R0(b1+mu) return bet, Theta def fact1(y, n, theta0, A01,design,beta0): """ This Fuction calculates the value of normalization constant p(D). It is called in Function_fit: y: Vector with the three data that we are using to compare the models. n: Sliding window (run length). theta0: Initial parameters (0 DeltaR0) exponential model, (0 0) linear model A01 : A priori covariance matrix. design: Design matrix. beta0 : beta0 of the prior distribution. Returns: The nomralization constant. """ design_T_design = np.dot(design,design.T) A01_inv = np.linalg.pinv(A01) An = A01_inv + design_T_design An_inv = np.linalg.pinv(An) alphan = alpha0 + n/2.0 thetan = np.dot(An_inv,(np.dot(A01_inv,theta0)+np.dot(design,y))) betan = beta0 +0.5*(np.dot(np.dot(theta0,A01_inv),theta0.T)+np.dot(y,y.T)+np.dot(np.dot(thetan,An_inv),thetan)) frac1 = (betan)**(-alphan) num1 = beta0**(alpha0)*spe.gamma(alphan)*np.sqrt(np.linalg.det(An)) den1 = (2*np.pi)**(n/2.0)*spe.gamma(alpha0)*np.sqrt(np.linalg.det(A01)) frac2 = num1/den1 BF = frac1*frac2 return BF def Function_fit(n1,n2,N,data,data_real,th02): """This is the main function, all initial conditions are calculated and the fact1 is called n1, n2: Number of data to compare the models, we recommend to use 3. N : Population size of the data analyzed. data, data_real : Temporal data to be analyzed th02 : The apriori expected value of DeltaR0 (R0-1). Returns: A list in which ach entry represents if the exp model wins (True) or False otherwise """ which_model = [] #Each entry represents if the exp model wins (True) or False otherwise prob_exp = [] prob_aaa = [] prob_bbb = [] for l in range(np.array([n1,n2]).max(),len(data)): z = [l-n1,l+1-n1,l+2-n1] #Data window to make all comparisons my = 0.3*(data[l-n1]+data[l+1-n1]+data[l+2-n1]) #average of the n values cov1 = (1.0/3.0)*((data[l-n1]-my)**2+(data[l+1-n1]-my)**2+(data[l+2-n1]-my)**2) #covariance of the n vlaues my1 = 0.3*(np.log(data[l-n1])+np.log(data[l+1-n1])+np.log(data[l+2-n1])) #average of n logged values cov2 = (1.0/3.0)*((np.log(data[l-n1])-my1)**2+(np.log(data[l+1-n1])-my1)**2+(np.log(data[l+2-n1])-my1)**2) #covariance of the n logged values beta1 = 0.5*cov1 #a priori parameters of linear model beta2 = 0.5*cov2 #a priori parameter of exponential model Sigma1 = np.array([[10**2, 0.0],[0.0,2]]) #a priori variance-covariance matrix for linear model Sigma2 = np.array([[np.log(10)**2, 0.0],[0.0,1.0/(N)]]) #a priori variance-covariance matrix for exponential A01 = Sigma1 A02 = Sigma2 design = np.array([np.ones(n1), np.linspace(0,n1-1,n1)]) #design matrix of linear model design1 = np.array([np.ones(n1), (b1+mu)*np.linspace(0,n1-1,n1)]) #design matrix of exponential model aaa = fact1(data[l-n1:l]-my, n1,np.array([ 0,th01]),A01,design,beta1) #normalization constant of linear model bbb = fact1( np.log(data[l-n2:l])-my1, n2,np.array([0,th02]),A02,design1,beta2) #normalization constant of exponential model bet,Theta = par_est(z,1,data_real) #beta and Theta estimates accept2 = Theta[1]>0 and (aaa/bbb)<1 #only compare runs that go up which_model.append(accept2) #True if exp model wins in a upwards run prob_exp.append(bbb/(aaa+bbb)) prob_aaa.append(aaa) prob_bbb.append(bbb) which_model= np.asarray(which_model) prob_exp = np.asarray(prob_exp) return which_model