AMBHAS
copula.py
Go to the documentation of this file.
00001 #! /usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 """
00004 Created on Wed Feb  9 19:13:28 2011
00005 
00006 @ author:                  Sat Kumar Tomer 
00007 @ author's webpage:        http://civil.iisc.ernet.in/~satkumar/
00008 @ author's email id:       satkumartomer@gmail.com
00009 @ author's website:        www.ambhas.com
00010 
00011 """
00012 from __future__ import division
00013 from scipy.stats import kendalltau, pearsonr, spearmanr
00014 import numpy as np
00015 from scipy.integrate import quad
00016 from scipy.optimize import fmin
00017 import sys
00018 import statistics as st
00019 from scipy.interpolate import interp1d
00020 from stats import scoreatpercentile
00021 
00022 class Copula():
00023     """
00024     This class estimate parameter of copula
00025     generate joint random variable for the parameters
00026     This class has following three copulas:
00027         Clayton
00028         Frank
00029         Gumbel
00030         
00031     Example:
00032         x = np.random.normal(size=100)
00033         y = np.random.normal(size=100)
00034         foo = Copula(x, y, 'frank')
00035         u,v = foo.generate(100)
00036     """
00037     
00038 
00039     def __init__(self, X, Y, family):
00040         """ initialise the class with X and Y
00041         Input:
00042             X:        one dimensional numpy array
00043             Y:        one dimensional numpy array
00044             family:   clayton or frank or gumbel
00045             
00046             Note: the size of X and Y should be same
00047         """
00048         # check dimension of input arrays
00049         if not ((X.ndim==1) and (Y.ndim==1)):
00050             raise ValueError('The dimension of array should be one.')
00051         
00052         # input array should have same zie
00053         if X.size is not Y.size:
00054             raise ValueError('The size of both array should be same.')
00055         
00056         # check if the name of copula family correct
00057         copula_family = ['clayton', 'frank', 'gumbel']
00058         if family not in copula_family:
00059             raise ValueError('The family should be clayton or frank or gumbel')
00060         
00061         self.X = X
00062         self.Y = Y
00063         self.family = family
00064         
00065         # estimate Kendall'rank correlation
00066         tau = kendalltau(self.X, self.Y)[0]
00067         self.tau = tau        
00068         
00069         # estimate pearson R and spearman R
00070         self.pr = pearsonr(self.X, self.Y)[0]
00071         self.sr = spearmanr(self.X, self.Y)[0]
00072         
00073         # estimate the parameter of copula
00074         self._get_parameter()
00075         
00076         # set U and V to none
00077         self.U = None
00078         self.V = None
00079         
00080         
00081     def _get_parameter(self):
00082         """ estimate the parameter (theta) of copula
00083         """        
00084         
00085         if self.family == 'clayton':
00086             self.theta = 2*self.tau/(1-self.tau)
00087             
00088         elif self.family == 'frank':
00089             self.theta = -fmin(self._frank_fun, -5, disp=False)[0]
00090             
00091         elif self.family == 'gumbel':
00092             self.theta = 1/(1-self.tau)
00093     
00094     def generate_uv(self, n=1000):
00095         """
00096         Generate random variables (u,v)
00097         
00098         Input:
00099             n:        number of random copula to be generated
00100         
00101         Output:
00102             U and V:  generated copula
00103             
00104         """
00105         # CLAYTON copula
00106         if self.family == 'clayton':
00107             U = np.random.uniform(size = n)
00108             W = np.random.uniform(size = n)
00109                 
00110             if self.theta <= -1:
00111                 raise ValueError('the parameter for clayton copula should be more than -1')
00112             elif self.theta==0:
00113                 raise ValueError('The parameter for clayton copula should not be 0')
00114                 
00115             if self.theta < sys.float_info.epsilon :
00116                 V = W
00117             else:
00118                 V = U*(W**(-self.theta/(1 + self.theta)) - 1 + U**self.theta)**(-1/self.theta)
00119     
00120         # FRANK copula
00121         elif self.family == 'frank':
00122             U = np.random.uniform(size = n)
00123             W = np.random.uniform(size = n)
00124             
00125             if self.theta == 0:
00126                 raise ValueError('The parameter for frank copula should not be 0')
00127             
00128             if abs(self.theta) > np.log(sys.float_info.max):
00129                 V = (U < 0) + np.sign(self.theta)*U
00130             elif abs(self.theta) > np.sqrt(sys.float_info.epsilon):
00131                 V = -np.log((np.exp(-self.theta*U)*(1-W)/W + np.exp(-self.theta)
00132                             )/(1 + np.exp(-self.theta*U)*(1-W)/W))/self.theta
00133             else:
00134                 V = W
00135         
00136         # GUMBEL copula
00137         elif self.family == 'gumbel':
00138             if self.theta <= 1 :
00139                 raise ValueError('the parameter for GUMBEL copula should be greater than 1')
00140             if self.theta < 1 + sys.float_info.epsilon:
00141                 U = np.random.uniform(size = n)
00142                 V = np.random.uniform(size = n)
00143             else:
00144                 u = np.random.uniform(size = n)
00145                 w = np.random.uniform(size = n)
00146                 w1 = np.random.uniform(size = n)
00147                 w2 = np.random.uniform(size = n)
00148                 
00149                 u = (u - 0.5) * np.pi
00150                 u2 = u + np.pi/2;
00151                 e = -np.log(w)
00152                 t = np.cos(u - u2/self.theta)/ e
00153                 gamma = (np.sin(u2/self.theta)/t)**(1/self.theta)*t/np.cos(u)
00154                 s1 = (-np.log(w1))**(1/self.theta)/gamma
00155                 s2 = (-np.log(w2))**(1/self.theta)/gamma
00156                 U = np.array(np.exp(-s1))
00157                 V = np.array(np.exp(-s2))
00158         
00159         self.U = U
00160         self.V = V        
00161         return U,V
00162     
00163     def generate_xy(self, n=1000):
00164         """
00165         Generate random variables (x, y)
00166         
00167         Input:
00168             n:        number of random copula to be generated
00169         
00170         Output:
00171             X1 and Y1:  generated copula random numbers
00172             
00173         """
00174         # if U and V are not already generated
00175         if self.U is None:
00176             self.generate_uv(n)
00177             
00178         # estimate inverse cdf of x andy
00179         self._inverse_cdf()
00180         
00181         # estimate X1 and Y1        
00182         X1 = self._inv_cdf_x(self.U)
00183         Y1 = self._inv_cdf_y(self.V)
00184         self.X1 = X1
00185         self.Y1 = Y1
00186         
00187         return X1, Y1
00188 
00189     def estimate(self, data=None):
00190         """
00191         this function estimates the mean, std, iqr for the generated
00192         ensemble
00193 
00194         Output:
00195             Y1_mean = mean of the simulated ensemble
00196             Y1_std = std of the simulated ensemble
00197             Y1_ll = lower limit of the simulated ensemble
00198             Y1_ul = upper limit of the simulated ensemble
00199         """
00200         nbin = 50
00201         #check if already the generate_xy has been called,
00202         #if not called, call now
00203         try:
00204             self.X1
00205             copula_ens = len(self.X1)
00206         except:
00207             copula_ens = 10000
00208             self.generate_xy(copula_ens)
00209         
00210         if data is None:
00211             data = self.X
00212         
00213         n_ens = copula_ens/nbin #average no. of bin in each class
00214         ind_sort = self.X1.argsort()
00215         x_mean = np.zeros((nbin,))
00216         y_mean = np.zeros((nbin,))
00217         y_ul = np.zeros((nbin,))
00218         y_ll = np.zeros((nbin,))
00219         y_std = np.zeros((nbin,))
00220     
00221         for ii in range(nbin):
00222             x_mean[ii] = self.X1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
00223             y_mean[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
00224             y_std[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].std()
00225             y_ll[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 25)
00226             y_ul[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 75)
00227             
00228         foo_mean = interp1d(x_mean, y_mean, bounds_error=False)
00229         foo_std = interp1d(x_mean, y_std, bounds_error=False)
00230         foo_ll = interp1d(x_mean, y_ll, bounds_error=False)
00231         foo_ul = interp1d(x_mean, y_ul, bounds_error=False)
00232         
00233         
00234         Y1_mean = foo_mean(data)
00235         Y1_std = foo_std(data)
00236         Y1_ll = foo_ll(data)
00237         Y1_ul = foo_ul(data)
00238         
00239         return Y1_mean, Y1_std, Y1_ll, Y1_ul
00240         
00241    
00242         
00243     def _inverse_cdf(self):
00244         """
00245         This module will calculate the inverse of CDF 
00246         which will be used in getting the ensemble of X and Y from
00247         the ensemble of U and V
00248         
00249         The statistics module is used to estimate the CDF, which uses
00250         kernel methold of cdf estimation
00251         
00252         To estimate the inverse of CDF, interpolation method is used, first cdf 
00253         is estimated at 100 points, now interpolation function is generated 
00254         to relate cdf at 100 points to data
00255         """
00256         x2, x1 = st.cpdf(self.X, kernel = 'Epanechnikov', n = 100)
00257         self._inv_cdf_x = interp1d(x2, x1)
00258         
00259         y2, y1 = st.cpdf(self.Y, kernel = 'Epanechnikov', n = 100)
00260         self._inv_cdf_y = interp1d(y2, y1)
00261         
00262     
00263     def _integrand_debye(self,t):
00264          """ 
00265          Integrand for the first order debye function
00266          """
00267          return t/(np.exp(t)-1)
00268          
00269     def _debye(self, alpha):
00270         """
00271         First order Debye function
00272         """
00273         return quad(self._integrand_debye, sys.float_info.epsilon, alpha)[0]/alpha
00274     
00275     def _frank_fun(self, alpha):
00276         """
00277         optimization of this function will give the parameter for the frank copula
00278         """
00279         diff = (1-self.tau)/4.0  - (self._debye(-alpha)-1)/alpha
00280         return diff**2
00281         
00282         
00283 
00284 
00285 
00286 
00287 
00288 
00289 
00290 
 All Classes Namespaces Files Functions Variables