import pandas as pd
import plotly.express as pt
import numpy as np


VIX = pd.read_csv("VIX_history.csv")


vix_time_series = pt.line(VIX, x = "DATE", y = "CLOSE",title = "VIX January 1990 - November 2021")
vix_time_series.show()


mean = VIX["CLOSE"].mean()
std = VIX["CLOSE"].std()


def normalize(val):
    val = (val - mean)/std
    return round(val)


VIX["CLOSE"] = VIX["CLOSE"].map(normalize)


pt.line(VIX, x = "DATE", y = "CLOSE", title = "Normalized and Rounded VIX Data")


pt.histogram(VIX,"CLOSE",histnorm = "probability density")


import random
'''
This Module takes a sequence of states, and a set of states; and computes a transition matrix
'''
class time_series_to_markov:
    """
    Prepares the data structures for conversion to markov model
    """
    def __init__(self,states,sequence):
        #Provides the state from an integer
        self.state_to_int = {}
        for i in range(len(states)):
            self.state_to_int[states[i]] = i
        self.states = sorted(states)
        self.sequence = sequence
        self.transitionmatrix = np.zeros(shape = (len(states),len(states)))
    """
    Obtains a transition matrix. Divides the number of jumps from one state to another by the total number of jumps
    from each specific state
    """
    def getTransitions(self):
        #Computes the amount of jumps from a specific state
        states_times = {}
        #Initialize total jumps from each state to 0
        for i in self.states:
            states_times[i] = 0
        #Traverse the sequence of states
        #Increment the transition count by 1 for each transition from i to j
        for i in range(len(self.sequence)-1):
            s = self.sequence[i]
            s_next = self.sequence[i+1]
            s_int = self.state_to_int[s]
            s_next_int = self.state_to_int[s_next]
            self.transitionmatrix[s_int][s_next_int]+=1
            #Increment the time spent in each state by 1 when we come across a state
            states_times[s] = states_times[s] + 1
        for i in self.states:
            row_sum = sum(self.transitionmatrix[self.state_to_int[i]])
            self.transitionmatrix[self.state_to_int[i]] = self.transitionmatrix[self.state_to_int[i]]/row_sum
        return self.transitionmatrix
    
    def simulate(self,starting_state,steps):
        output_sequence = []
        step = 0
        matrix_state = self.state_to_int[starting_state]
        while step < steps:
            output_sequence.append(matrix_state)
            r = random.random()
            p = 0
            jump_distribution = self.transitionmatrix[matrix_state]
            for i in range(len(jump_distribution)):  
                p+=jump_distribution[i]
                if r <= p:
                    #print(r,p)
                    #print("Jumping from",matrix_state,"to",i)
                    matrix_state = i
                    break
            ##Convert the output_sequence back to states
            step+=1
        return np.asarray(output_sequence)


sequence = VIX["CLOSE"]
A = time_series_to_markov([-1,0,1,2,3,4,5,6,7,8],sequence)


P = A.getTransitions()
P = np.matrix(P)
P

matrix([[9.32761578e-01, 6.72384220e-02, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00],
        [5.93579649e-02, 8.87946699e-01, 5.17867959e-02, 9.08540279e-04,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 1.35261923e-01, 8.15480844e-01, 4.76935106e-02,
         1.56372166e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 1.98776758e-01, 7.09480122e-01,
         8.86850153e-02, 3.05810398e-03, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 7.63358779e-03, 2.29007634e-01,
         7.02290076e-01, 5.34351145e-02, 7.63358779e-03, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.33333333e-02,
         2.33333333e-01, 4.66666667e-01, 1.33333333e-01, 1.00000000e-01,
         3.33333333e-02, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         4.16666667e-02, 2.50000000e-01, 3.75000000e-01, 2.91666667e-01,
         0.00000000e+00, 4.16666667e-02],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 1.00000000e-01, 4.50000000e-01, 3.50000000e-01,
         1.00000000e-01, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00, 1.42857143e-01, 2.85714286e-01,
         2.85714286e-01, 2.85714286e-01],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.33333333e-01,
         6.66666667e-01, 0.00000000e+00]])


def getStationary(P,runs):
    for i in range(0,runs):
        P = np.matmul(P,P)
    return P


W = getStationary(P,20)


W[0]

matrix([[3.62184379e-01, 4.10268549e-01, 1.59832409e-01, 4.08534257e-02,
         1.63666256e-02, 3.74807526e-03, 2.99846087e-03, 2.49871722e-03,
         8.74551002e-04, 3.74807585e-04]])


sim = A.simulate(0,8039)


pt.line(VIX["CLOSE"],title = "Original VIX Transitions")


for i in range(len(sim)):
    sim[i] = sim[i] - 1


pt.line(sim,title = "Simulated VIX Transitions")


simulation_hist = pt.histogram(sim,histnorm = "probability density")
simulation_hist.update_layout(title_text = "Occupation Freq of Simulation")


def autocorr(x):
    result = np.correlate(x, x, mode='full')
    return result[round(result.size/2):]


a = autocorr(sim)
b = autocorr(VIX["CLOSE"])
data = {"Simulation":a,"Original":b}
df = pd.DataFrame(data)
autocorr_graph = pt.line(df,x = df.index,y = ["Simulation","Original"],title = "Autocorrelation Plot")
autocorr_graph.show()


def q_ij(P,states):
    q = np.zeros((states,states))
    for i in range(states):
        for k in range(states):
            for j in range(states):
                       q[i][j]+=P[i][k]*P[k][j]
    return q


def N_ij(sequence,states):
    N = np.zeros((states,states))
    #Convert states to indices
    for i in range(len(sequence)):
        sequence[i]+=1
    for i in range(len(sequence) - 2):
        curr = sequence[i]
        plus_2 = sequence[i+2]
        N[curr][plus_2]+=1
    N = N/(len(sequence)-2)
    return N


def N_i(N,states):
    N_i = []
    for i in range(states):
        N_i.append(sum(N[i]))
    return N_i


def M_ij(N_i,q_ij,states):
    M = np.zeros((states,states))
    for i in range(states):
        for j in range(states):
            M[i][j] = N_i[i]*q_ij[i][j]
    return M


P = A.transitionmatrix
q = q_ij(P,10)
N = N_ij(sequence,10)
N_i = N_i(N,10)
M = M_ij(N_i,q,10)


import scipy.stats

M

array([[3.17010438e-01, 4.44019936e-02, 1.26293542e-03, 2.21567618e-05,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [4.44019936e-02, 3.28451940e-01, 3.63173373e-02, 1.61103282e-03,
        6.63744493e-05, 1.14150951e-06, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.27670761e-03, 3.66383671e-02, 1.08369389e-01, 1.16417320e-02,
        1.04998441e-03, 3.64794279e-05, 1.89812601e-06, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 1.09394364e-03, 1.23608109e-02, 2.16964068e-02,
        5.13578587e-03, 3.39152060e-04, 4.41343012e-05, 1.24424537e-05,
        4.14748455e-06, 0.00000000e+00],
       [0.00000000e+00, 1.68299021e-05, 9.30829124e-04, 5.30473221e-03,
        8.57879821e-03, 1.06064954e-03, 2.50170886e-04, 1.23387665e-04,
        2.90323919e-05, 5.18435569e-06],
       [0.00000000e+00, 0.00000000e+00, 3.13813454e-05, 3.45800700e-04,
        1.04990032e-03, 1.02157985e-03, 6.11292637e-04, 4.85551941e-04,
        1.30942012e-04, 5.62872903e-05],
       [0.00000000e+00, 0.00000000e+00, 9.49805622e-07, 5.33790760e-05,
        3.08235670e-04, 7.22089724e-04, 9.12359536e-04, 7.47584090e-04,
        1.94931774e-04, 4.66592012e-05],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 8.29496910e-06,
        1.04723985e-04, 4.83181950e-04, 8.80599845e-04, 7.27439165e-04,
        1.66491880e-04, 1.17758936e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        5.18435569e-06, 5.59910414e-05, 1.94191152e-04, 2.77437092e-04,
        2.61884024e-04, 7.62840908e-05],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 1.24424537e-05, 9.15409090e-05, 1.14648323e-04,
        8.35421888e-05, 7.10997352e-05]])

N

array([[3.28729625e-01, 3.37190494e-02, 1.24424537e-04, 1.24424537e-04,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [3.39678985e-02, 3.49508523e-01, 2.68756999e-02, 3.73273610e-04,
        1.24424537e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 2.73733980e-02, 1.20318527e-01, 1.07005101e-02,
        6.22122683e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 1.16959064e-02, 2.45116337e-02,
        4.35485878e-03, 1.24424537e-04, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 2.48849073e-04, 4.97698146e-03,
        9.70511385e-03, 1.24424537e-03, 0.00000000e+00, 0.00000000e+00,
        1.24424537e-04, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.24424537e-03, 1.11982083e-03, 4.97698146e-04, 8.70971756e-04,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        2.48849073e-04, 4.97698146e-04, 1.49309444e-03, 6.22122683e-04,
        1.24424537e-04, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 7.46547219e-04, 7.46547219e-04, 4.97698146e-04,
        2.48849073e-04, 2.48849073e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 2.48849073e-04, 2.48849073e-04,
        2.48849073e-04, 1.24424537e-04],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.48849073e-04,
        1.24424537e-04, 0.00000000e+00]])


def clean_matrices(M,N):
    Mf = M.flatten()
    Nf = N.flatten()
    to_delete = []
    for i in range(len(Mf)):
        if Mf[i] == 0:
            to_delete.append(i)
    Mf = np.delete(Mf,to_delete)
    Nf = np.delete(Nf,to_delete)
    return (Mf,Nf)


M_clean, N_clean = clean_matrices(M,N)
(M_clean,N_clean)

(array([3.17010438e-01, 4.44019936e-02, 1.26293542e-03, 2.21567618e-05,
        4.44019936e-02, 3.28451940e-01, 3.63173373e-02, 1.61103282e-03,
        6.63744493e-05, 1.14150951e-06, 1.27670761e-03, 3.66383671e-02,
        1.08369389e-01, 1.16417320e-02, 1.04998441e-03, 3.64794279e-05,
        1.89812601e-06, 1.09394364e-03, 1.23608109e-02, 2.16964068e-02,
        5.13578587e-03, 3.39152060e-04, 4.41343012e-05, 1.24424537e-05,
        4.14748455e-06, 1.68299021e-05, 9.30829124e-04, 5.30473221e-03,
        8.57879821e-03, 1.06064954e-03, 2.50170886e-04, 1.23387665e-04,
        2.90323919e-05, 5.18435569e-06, 3.13813454e-05, 3.45800700e-04,
        1.04990032e-03, 1.02157985e-03, 6.11292637e-04, 4.85551941e-04,
        1.30942012e-04, 5.62872903e-05, 9.49805622e-07, 5.33790760e-05,
        3.08235670e-04, 7.22089724e-04, 9.12359536e-04, 7.47584090e-04,
        1.94931774e-04, 4.66592012e-05, 8.29496910e-06, 1.04723985e-04,
        4.83181950e-04, 8.80599845e-04, 7.27439165e-04, 1.66491880e-04,
        1.17758936e-04, 5.18435569e-06, 5.59910414e-05, 1.94191152e-04,
        2.77437092e-04, 2.61884024e-04, 7.62840908e-05, 1.24424537e-05,
        9.15409090e-05, 1.14648323e-04, 8.35421888e-05, 7.10997352e-05]),
 array([3.28729625e-01, 3.37190494e-02, 1.24424537e-04, 1.24424537e-04,
        3.39678985e-02, 3.49508523e-01, 2.68756999e-02, 3.73273610e-04,
        1.24424537e-04, 0.00000000e+00, 0.00000000e+00, 2.73733980e-02,
        1.20318527e-01, 1.07005101e-02, 6.22122683e-04, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 1.16959064e-02, 2.45116337e-02,
        4.35485878e-03, 1.24424537e-04, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 2.48849073e-04, 4.97698146e-03,
        9.70511385e-03, 1.24424537e-03, 0.00000000e+00, 0.00000000e+00,
        1.24424537e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.24424537e-03, 1.11982083e-03, 4.97698146e-04, 8.70971756e-04,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        2.48849073e-04, 4.97698146e-04, 1.49309444e-03, 6.22122683e-04,
        1.24424537e-04, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        7.46547219e-04, 7.46547219e-04, 4.97698146e-04, 2.48849073e-04,
        2.48849073e-04, 0.00000000e+00, 0.00000000e+00, 2.48849073e-04,
        2.48849073e-04, 2.48849073e-04, 1.24424537e-04, 0.00000000e+00,
        0.00000000e+00, 2.48849073e-04, 1.24424537e-04, 0.00000000e+00]))


scipy.stats.chisquare(f_obs = N_clean, f_exp=M_clean)

Power_divergenceResult(statistic=0.02274031861807105, pvalue=1.0)

Markov Behavior of Volatility Index (VIX)¶

Introduction¶

Data and Observation¶

Normalization and Time Series Mapping¶

Occupation Frequencies¶

Markov Chain Analysis¶

Comparing Stationary Distribution to Occupation Frequencies¶

Simulating The Markov Chain¶

Autocorrelation Analysis¶

Goodness of Fit Test¶

Preliminary Functions¶

$\chi^{2}$ Goodness of Fit Test¶

Expected Frequencies¶

Observed Frequencies¶

Cleaning Matrices and Test Statistic¶

Results¶