# Epsilon_Machine_Reconstruction.py
# This progam contains functions for topological epsilon machine reconstruction from data sequences and topological epsilon machine reconstruction from ECA data sequences,
# which is slightly different (as I've written it) and involves a cutoff paramater. Also there are some functions to generate data sequences and some wrapper
# functions for constructing (or attempting to construct) machines for all ECAs and processing these reconstruction results.
# There are 13 functions in total:

#(1) convert_DFA_to_hidden_markov_chain
#(2) generate_data
#(3) generate_CA_data
#(4) plot_sequence_data
#(5) plot_sequence_data_machines
#(6) remove_redundancies_from_list
#(7) construct_epsilon_machince
#(8) construct_epsilon_machine_ECA
#(9) construct_ECA_domain_machines
#(10) convert_ECA_domain_machines_to_process_graphs
#(11) extract_process_graphs
#(12) test_process_graphs_as_domains

from Graph_Functions import *
from Automata_Functions import *
from CA_Functions import *
from pylab import *
from numpy import*
from numpy.random import randint
from string import *


#(1) Define convert_DFA_to_hidden_markov_chain(M)
#Input - a DFA M given as a list of N+1 states of the form [accept?, [IS = 0 transition, Is = 1 transitition]] where state 0 is the unique start state and state N is the "junk state".
#Output - a hiddne markov chain HMC given as an array of N by N arrays of transition probabilities p_ij = probabability of going from state i to state j on symbol s.
#         The number of such arrays = number of symbols used in the DFA and the probabilities p_ij are uniformly distributed among possible transitions
#         i.e. if in state 3 you can to state 4 on symbol 0, or state 5 on symbol 1, but no other states on any input symbol then
#         p_34 in array 0 = p_35 in array 1 = 1/2 and p_3j = 0 for all other j in all other arrays.

def convert_DFA_to_hidden_markov_chain(M):

    num_symbols = len(M[0][1])
    N = len(M) - 1

    HMC = zeros((num_symbols,N,N),float)

    for state in xrange(N):
        num_transitions = 0
        
        for symbol in xrange(num_symbols):

            if M[state][1][symbol] != N:
                num_transitions = num_transitions + 1

        #print ''
        #print 'num_transitions =', num_transitions
        for symbol in xrange(num_symbols):

            if M[state][1][symbol] != N:
                transition = M[state][1][symbol]
                #print 'transition =', transition
                HMC[symbol][state][transition] = 1.0/num_transitions

    return HMC


#(2) define generate_data(HMC)
# This function generates time series data for a hidden markov chain HMC given as an array of s transition matrices T(s). The num_data_points values are generated and the machine
# is assumed to start in state 0. Note the start is irrevelevant if generating many data points assuming the associated graph has only one strongly connected component.

def generate_data(HMC,num_data_points):

    num_symbols = len(HMC)
    N = len(HMC[0][0])

    # (i) create the transition list, a list of transitions for each state of the form [[x_min,x_max],observe_symbol,next_state]
    transition_list = []

    for n in xrange(N):
        n_transition_list = []
        x = 0
        
        for symbol in xrange(num_symbols):
            for transition_state in xrange(N):

                prob = HMC[symbol][n][transition_state]
                if prob != 0:
                    n_transition_list.append([[x, x + prob], symbol, transition_state])
                    x = x + prob

        transition_list.append(n_transition_list)

    # (ii) use the transition list to generate the time series data
    machine_state = 0
    data = []

    for x in xrange(num_data_points):
         p = random.uniform(0.0,1.0)
         
         for t in transition_list[machine_state]:
             if t[0][0] <= p and t[0][1] >= p:
                 data.append(t[1])
                 machine_state = t[2]


    return data


# (3) define generate_CA_data(Range, rule_number, num_iterations, num_cells)
# This function takes a CA range, rule number and iterates from a random initial condition on a 1D lattice with num_cells cells and returns the result as final_state
def generate_CA_data(Range, rule_number, num_iterations, num_cells):

    # determine the CA lookup table
    lookup_table = DetermineRule(Range, rule_number)
    #print 'lookup_table =', lookup_table

    # generate a random initial state 
    state = 7*ones(num_cells,int)
    for cell in xrange(num_cells):
        state[cell] = randint(0,2)

    # iteterate the cellular automata for T timesteps
    for t in xrange(num_iterations):
        state = IterateState(state, num_cells, Range, lookup_table)

    final_state = state
    return final_state


# (4) define plot_sequence_data(sequence_data)
def plot_sequence_data(sequence_data):
    num_data_points = len(sequence_data)
    print 'data =', sequence_data

    #convert to square wave form for plotting
    converted_data = []
    for y in xrange(len(sequence_data)):
        converted_data.append(data[y])
        converted_data.append(data[y])
    #print 'converted_data =', converted_data

    #the corresponding time series data
    t_points = [0]
    for t in xrange((num_data_points - 1)*2):
        t_points.append(t/2 + 1)
    t_points.append(num_data_points)
    #print 't_points =', t_points

    #plot
    plot(t_points,converted_data)
    axis([0, num_data_points+1, -2, 2])  

    show()


# (5) define plot_sequence_data_machines(machines)
def plot_sequence_data_machines(machines, num_data_points):
    sequence_data = []
    N = len(machines)

    #generated_data
    for m in machines:
        HMC = convert_DFA_to_hidden_markov_chain(m)
        data = generate_data(HMC,num_data_points)
        #print 'data =', data
        sequence_data.append(data)

    #convert to square wave form for plotting
    print ''
    for x in xrange(N):
        data = sequence_data[x]
        #print 'data = ', data
        converted_data = []
        
        for y in xrange(len(data)):
            converted_data.append(data[y])
            converted_data.append(data[y])

        sequence_data[x] = converted_data

    #plot data
    t_points = [0]
    for t in xrange((num_data_points - 1)*2):
        t_points.append(t/2 + 1)
    t_points.append(num_data_points)
    #print 't_points =', t_points
    
    for x in xrange(N):
        subplot(N,1,x+1)
        plot(t_points,sequence_data[x])
        axis([0, num_data_points+1, -2, 2])

    savefig('some_machines_data.jpg', dpi=600)    

    show()


# (6) define remove_redundancies_from_list(L)
# This function takes a list L and returns a new list LL which is the 'set of L' (all redundant elements have been removed). i.e. [1, 2, 2, 3, 2, 4, 1, 5, 7, 2] --> [1,2,3,4,5,7]. It is used in function (3)
def remove_redundancies_from_list(L):
    LL = []

    for i in xrange(len(L)):
        add = 1
        
        for j in xrange(len(LL)):
            if L[i] == LL[j]:add = 0

        if add == 1: LL.append(L[i])

    #LL = sort(LL)
    return LL
            
        
#(7) define construct_epsilon_machine(data,num_sybmols)
# This function determines a topological (non-probabilistic) epsilon machine M (i.e. a DFA) accepting words from a language sample 'data' generated by some presumably
# hidden markov model source like a CA. M is a list of states of the form [accept?, [IS = 0 transition, Is = 1 transitition]] where state 0 is the unique start state and state N is the "junk state".
# The language sample 'data' is simply a list of symbols from some finite alphabet i.e. data = [0,1,0,0,1,0,1,1,1,1,0...]. The sample data should be long for an accuracte result i.e. of order 10,000+ symbols.

def construct_epsilon_machince(data,num_symbols):

    num_data_points = len(data)
    #D = num_data_points/2
    D = 30                     #note tree does not need to be that deep because we only look at first several levels to find equivalence classes anyway max size of machine is O(D) but bit less

    # (i) first determine paths of length l for l = 1,2,3, ...D and store these in the variable 'paths' which is a list of lists l_k where each l_k is a list of all length k paths
    Paths = []

    for k in range(1,D):
        k_paths = []
        
        for x in xrange(num_data_points - k):
            next_k_path = data[x:x + k]
            k_paths.append(next_k_path)

        k_paths = remove_redundancies_from_list(k_paths)
        Paths.append(k_paths)

    #for p in Paths:
    #    print p


    #(ii) form data tree of Depth D = num_data_points/2. The Tree T is constructed by levels L0,L1,L2, etc... each level is list of states of form [node_nubmer, path, [IS0_transtions, IS1_transitions]]
    # if there is no transition to another node on a given symbol we say IS0_transitions = 'X' or IS1_transitions = 'X'. path is is a list of 0's and 1's used to reach given node from node 0
    
    T = []
    T = [[ [0,[],[]] ]] #the 0th node
    num_nodes = 0

    for k in range(D - 1):
        T_kp1 = []

        for node in T[k]:
            for symbol in xrange(num_symbols):
                #print 'node =', node
                #print 'symbol =', symbol
                
                possible_transition_path = node[1] + [symbol]
                #print 'possible_transition_path =', possible_transition_path
                good = 0
                
                for word in Paths[k]:
                    #print 'word = ', word
                    
                    if word == possible_transition_path:
                        num_nodes = num_nodes + 1
                        T_kp1.append( [num_nodes,word,[]] )
                        node[2].append(num_nodes)
                        good = 1

                if good == 0: node[2].append('X')
                #print 'good = ', good

                        
        if k != D - 2: T.append(T_kp1)
  
        #print 'T_k =', T[k]
        #print 'T_kp1 =', T_kp1
        #print ''


    #convert tree T to a new tree-graph TT which is simply a list of the nodes and their connections
    TT = []
    for level in xrange(D-1):
        #print ''
        #print 'level =', level
        for node in T[level]:
            #print 'node =', node
            TT.append(node)

    #for tt in TT:
        #print tt
        #print ''
    #print 'data =', data

    print 'done Tree construction'


    #loop looking at depth L trees (machines with max 2^L states) until the target machine is reached. (number of states does not increase as L increases).
    old_machine_size = 0
    equality_count = 0
    L = 2
    
    while equality_count < 1:
        
        # construct the list of L level subtrees of the orginal tree. Each of these is given as a tree-graph, which is simply a list of the nodes and their connections
        L = L + 1
        print ''
        print 'L =', L
        List_of_L_Subtrees = []
    
        #compute max node up to depth L in tree TT
        max_node = 0
        for i in T[L]:# NOTE - we assume here that all level L subtrees have roots in first L levels of main tree T. (I believe this is ok for a processs graph DFA domain)
            if i[0] > max_node: max_node = i[0]
        print 'max_node =', max_node
    
    
        for node in xrange(max_node + 1):                  #(num_data_points/4): #xrange(len(TT)/100):
            #print ''
            #print 'node =', node
        

            STL = [ [[0, node],['temp','temp']] ]
            num_STL_nodes = 0
            node_names = set(['X',node])
            #node_names = set(['X',0])
   

            for k in xrange(L):
                new_STL_nodes = []
            
                for sub_tree_node in STL:
                    for symbol in xrange(num_symbols):
                    
                        node_number = sub_tree_node[0][1]
                        #print 'TT[node_number] =', TT[node_number]
                        IS_transition = TT[node_number][2][symbol]
                        #print 'IS_transition =', IS_transition
            
                        if IS_transition not in node_names:
                            node_names = node_names | set([IS_transition])
                            num_STL_nodes = num_STL_nodes + 1
                            new_STL_node = [[num_STL_nodes, IS_transition],['temp','temp']]
                            #print 'new_STL_node =', new_STL_node
                            new_STL_nodes.append(new_STL_node)
                            sub_tree_node[1][symbol] = num_STL_nodes
                        
                        if IS_transition == 'X': sub_tree_node[1][symbol] = 'X'

                STL = STL + new_STL_nodes
                #print 'new_STL_nodes =', new_STL_nodes

            for sub_tree_node in STL:
                sub_tree_node[0] = sub_tree_node[0][0]

            #print 'STL =', STL
            List_of_L_Subtrees.append(STL)
        

        List_of_L_Subtrees = remove_redundancies_from_list(List_of_L_Subtrees)
        #print ''
        #print 'List_of_L_Subtrees'
        #for L_Subtree in List_of_L_Subtrees:
            #print L_Subtree
            #print ''
        new_machine_size = len(List_of_L_Subtrees)    
        print 'number of L_Subtrees =', new_machine_size

        if new_machine_size == old_machine_size: equality_count = equality_count + 1
        if new_machine_size != old_machine_size: equality_count = 0
        old_machine_size = new_machine_size

        if L > 10:
            print ''
            print 'ERROR !!!!'
            break

    num_machine_states = new_machine_size

    #once we have the list of L level subtrees we classify each of the nodes in the upper part of the tree ("solid domain region") by their L_subtree
    in_solid_domain = 1
    node = -1
    nodelabels = []
    
    while in_solid_domain == 1:
        
        node = node + 1
        STL = [ [[0, node],['temp','temp']] ]
        num_STL_nodes = 0
        node_names = set(['X',node])
   

        for k in xrange(L):
            new_STL_nodes = []
            
            for sub_tree_node in STL:
                for symbol in xrange(num_symbols):
                    
                    node_number = sub_tree_node[0][1]
                    #print 'TT[node_number] =', TT[node_number]
                    IS_transition = TT[node_number][2][symbol]
                    #print 'IS_transition =', IS_transition
            
                    if IS_transition not in node_names:
                        node_names = node_names | set([IS_transition])
                        num_STL_nodes = num_STL_nodes + 1
                        new_STL_node = [[num_STL_nodes, IS_transition],['temp','temp']]
                        #print 'new_STL_node =', new_STL_node
                        new_STL_nodes.append(new_STL_node)
                        sub_tree_node[1][symbol] = num_STL_nodes
                        
                    if IS_transition == 'X': sub_tree_node[1][symbol] = 'X'

            STL = STL + new_STL_nodes
            #print 'new_STL_nodes =', new_STL_nodes

        for sub_tree_node in STL:
            sub_tree_node[0] = sub_tree_node[0][0]

        #print 'STL =', STL
         
        nodelabel = 'XX'
        for i in xrange(len(List_of_L_Subtrees)):
            if STL == List_of_L_Subtrees[i]:
                nodelabel = i
                nodelabels.append(nodelabel)
                
        if nodelabel == 'XX': in_solid_domain = 0
        if node >= len(TT)/2: in_solid_domain = 0

    num_solid_domain_nodes = node
    #print 'num_solid_domain_nodes =', num_solid_domain_nodes

    #once we have individiual nodes classified we construct the corresponding DFA from their connections
    
    #first consturct a hidden markov chain transition matrix representation for the DFA
    MM_HMC = zeros((num_symbols, num_machine_states, num_machine_states), float)
    for s in xrange(num_symbols):
        for x in xrange(num_solid_domain_nodes):
            
            i = nodelabels[x]
            j_node = TT[x][2][s]
            if j_node != 'X'and j_node < num_solid_domain_nodes:
                j = nodelabels[j_node]
                MM_HMC[s][i][j] = 1

    #next convert this to a list of states form of DFA
    MM = []
    for x in xrange(num_machine_states):
        MM.append([1,['X','X']])

    for s in xrange(num_symbols):
        for x in xrange(num_machine_states):
            for y in xrange(num_machine_states):

                if MM_HMC[s][x][y] == 1:MM[x][1][s] = y

    #finally add in a 'junk state' everything transitions to on 'X' (when it wouldn't have anywhere to go)
    MM.append([0,[num_machine_states,num_machine_states]])
    for state in xrange(len(MM)):
        for symbol in xrange(num_symbols):
            if MM[state][1][symbol] == 'X':MM[state][1][symbol] = num_machine_states


    return MM, MM_HMC


#(8) define construct_epsilon_machine_ECA(data,num_sybmols)
# This function determines a topological (non-probabilistic) epsilon machine M (i.e. a DFA) hopefully accepting all and only words from the domain of a CA used to generate the language sample 'data'
# It is assumed that the CA only has one recurrent domain otherwise there may be problems


def construct_epsilon_machine_ECA(data, num_symbols, cut_off, max_L, min_L, num_times_equal_machine_required, D, max_num_nodes_in_tree):

    num_data_points = len(data)
    print 'num_symbols =', num_symbols
    print 'cut_off =', cut_off
    print 'max_L =', max_L
    print 'min_L =', min_L
    print 'num_times_equal_machine_required =', num_times_equal_machine_required
    print 'D = ',D
    print 'max_num_nodes_in_tree =', max_num_nodes_in_tree
    
    #D = num_data_points/2
    #D = 35                     #note tree does not need to be that deep because we only look at first several levels to find equivalence classes anyway max size of machine is O(D) but bit less

    # (i) first determine paths of length l for l = 1,2,3, ...D and store these in the variable 'paths' which is a list of lists l_k where each l_k is a list of all length k paths
    Paths = []
    current_num_nodes_in_tree = 1

    for k in range(1,D):
        k_paths = []
        
        for x in xrange(num_data_points - k):
            next_k_path = data[x:x + k]
            next_k_path = list(next_k_path)
            #print 'next_k_path =', next_k_path
            #k_paths.append(next_k_path)
            k_paths = k_paths + [next_k_path]

        #print 'k_paths =', k_paths
        k_paths = remove_redundancies_from_list(k_paths)
        Paths.append(k_paths)
        current_num_nodes_in_tree = current_num_nodes_in_tree + len(k_paths)
        if current_num_nodes_in_tree > max_num_nodes_in_tree:
            D = k
            print 'D = ', D
            break

    #print 'Paths = '  
    #for k in xrange(D-1):
    #    print Paths[k]


    # (ii) next determine the probabilities of each of these paths and remove those with probabilities that are to low (these correspond to erroneous non-domain paths where walls occur)
    # low is defined as 1/10 of maximum probability or less
    PPaths = []
    for k in range(D-1):
        kk_paths = []
        
        for path in Paths[k]:
            kk_paths.append([path,0])

        PPaths.append(kk_paths)

    #print 'PPaths ='
    #for level in PPaths:
    #    print level


    for k in range(1,D):
        
        for x in xrange(num_data_points - k):
            k_path = data[x:x+k]
            k_path = list(k_path)
            
            for y in PPaths[k-1]:
                #print 'y =', y
                if k_path == y[0]:y[1] = y[1] + 1

    #print 'Paths'
    #for p in Paths:
    #    print p

    #print 'PPaths'
    #for pp in PPaths:
    #    print pp


    #(iii) form data tree of Depth D = num_data_points/2. The Tree T is constructed by levels L0,L1,L2, etc... each level is list of states of form [node_nubmer, path, [IS0_transtions, IS1_transitions]]
    # if there is no transition to another node on a given symbol we say IS0_transitions = 'X' or IS1_transitions = 'X'. path is is a list of 0's and 1's used to reach given node from node 0
    T = []
    T = [[ [0,[],[]] ]] #the 0th node
    num_nodes = 0

    for k in range(D - 1):
        T_kp1 = []

        for node in T[k]:
            transition_non_normalized_probabilities = [0.0, 0.0]
            
            for symbol in xrange(num_symbols):
                #print 'node =', node
                #print 'symbol =', symbol
                
                possible_transition_path = node[1] + [symbol]
                #print 'possible_transition_path =', possible_transition_path
                
                for i in xrange(len(PPaths[k])):
                    word = PPaths[k][i][0]
                    #print 'word = ', word
                    if word == possible_transition_path:
                        transition_non_normalized_probabilities[symbol] = 1.0*PPaths[k][i][1]

            if transition_non_normalized_probabilities[0]/sum(transition_non_normalized_probabilities) >= cut_off:
                num_nodes = num_nodes + 1
                word = node[1] + [0]
                T_kp1.append( [num_nodes, word, []] )
                node[2].append(num_nodes)
            else:node[2].append('X')

            if transition_non_normalized_probabilities[1]/sum(transition_non_normalized_probabilities) >= cut_off:
                num_nodes = num_nodes + 1
                word = node[1] + [1]
                T_kp1.append( [num_nodes, word, []] )
                node[2].append(num_nodes)
            else: node[2].append('X')

                        
        if k != D - 2: T.append(T_kp1)
  
        #print 'T_k =', T[k]
        #print 'T_kp1 =', T_kp1
        #print ''


    #convert tree T to a new tree-graph TT which is simply a list of the nodes and their connections
    TT = []
    for level in xrange(D-1):
        #print ''
        #print 'level =', level
        for node in T[level]:
            #print 'node =', node
            TT.append(node)

    #for tt in TT:
    #    print tt
    #    print ''
    #print 'data =', data

    print 'done Tree construction'
    

    #loop looking at depth L trees (machines with max 2^L states) until the target machine is reached. (number of states does not increase as L increases).
    old_machine_size = 0
    equality_count = 1
    L = min_L - 1
    if max_L > (D-1)/2: max_L = (D-1)/2 
    Error = 0
    
    while equality_count < num_times_equal_machine_required:
        
        # construct the list of L level subtrees of the orginal tree. Each of these is given as a tree-graph, which is simply a list of the nodes and their connections
        L = L + 1
        print ''
        print 'L =', L
        List_of_L_Subtrees = []
    
        #compute max node up to depth L in tree TT
        max_node = 0
        for i in T[L]:# NOTE - we assume here that all level L subtrees have roots in first L levels of main tree T. (I believe this is ok for a processs graph DFA domain)
            if i[0] > max_node: max_node = i[0]
        print 'max_node =', max_node
    
    
        for node in xrange(max_node + 1):                  #(num_data_points/4): #xrange(len(TT)/100):
            #print ''
            #print 'node =', node
        

            STL = [ [[0, node],['temp','temp']] ]
            num_STL_nodes = 0
            node_names = set(['X',node])
            #node_names = set(['X',0])
   

            for k in xrange(L):
                new_STL_nodes = []
            
                for sub_tree_node in STL:
                    for symbol in xrange(num_symbols):
                    
                        node_number = sub_tree_node[0][1]
                        #print 'TT[node_number] =', TT[node_number]
                        IS_transition = TT[node_number][2][symbol]
                        #print 'IS_transition =', IS_transition
            
                        if IS_transition not in node_names:
                            node_names = node_names | set([IS_transition])
                            num_STL_nodes = num_STL_nodes + 1
                            new_STL_node = [[num_STL_nodes, IS_transition],['temp','temp']]
                            #print 'new_STL_node =', new_STL_node
                            new_STL_nodes.append(new_STL_node)
                            sub_tree_node[1][symbol] = num_STL_nodes
                        
                        if IS_transition == 'X': sub_tree_node[1][symbol] = 'X'

                STL = STL + new_STL_nodes
                #print 'new_STL_nodes =', new_STL_nodes

            for sub_tree_node in STL:
                sub_tree_node[0] = sub_tree_node[0][0]

            #print 'STL =', STL
            List_of_L_Subtrees.append(STL)
        

        List_of_L_Subtrees = remove_redundancies_from_list(List_of_L_Subtrees)
        #print ''
        #print 'List_of_L_Subtrees'
        #for L_Subtree in List_of_L_Subtrees:
            #print L_Subtree
            #print ''
        new_machine_size = len(List_of_L_Subtrees)    
        print 'number of L_Subtrees =', new_machine_size

        if new_machine_size == old_machine_size: equality_count = equality_count + 1
        if new_machine_size != old_machine_size: equality_count = 1
        old_machine_size = new_machine_size

        if L >= max_L:
            Error = 1
            print ''
            print 'ERROR !!!!'
            break

    num_machine_states = new_machine_size

    #once we have the list of L level subtrees we classify each of the nodes in the upper part of the tree ("solid domain region") by their L_subtree

    if Error == 0:
        
        in_solid_domain = 1
        node = -1
        nodelabels = []
    
        while in_solid_domain == 1:
        
            node = node + 1
            STL = [ [[0, node],['temp','temp']] ]
            num_STL_nodes = 0
            node_names = set(['X',node])
   

            for k in xrange(L):
                new_STL_nodes = []
            
                for sub_tree_node in STL:
                    for symbol in xrange(num_symbols):
                    
                        node_number = sub_tree_node[0][1]
                        #print 'TT[node_number] =', TT[node_number]
                        IS_transition = TT[node_number][2][symbol]
                        #print 'IS_transition =', IS_transition
            
                        if IS_transition not in node_names:
                            node_names = node_names | set([IS_transition])
                            num_STL_nodes = num_STL_nodes + 1
                            new_STL_node = [[num_STL_nodes, IS_transition],['temp','temp']]
                            #print 'new_STL_node =', new_STL_node
                            new_STL_nodes.append(new_STL_node)
                            sub_tree_node[1][symbol] = num_STL_nodes
                        
                        if IS_transition == 'X': sub_tree_node[1][symbol] = 'X'

                STL = STL + new_STL_nodes
                #print 'new_STL_nodes =', new_STL_nodes

            for sub_tree_node in STL:
                sub_tree_node[0] = sub_tree_node[0][0]

            #print 'STL =', STL
         
            nodelabel = 'XX'
            for i in xrange(len(List_of_L_Subtrees)):
                if STL == List_of_L_Subtrees[i]:
                    nodelabel = i
                    nodelabels.append(nodelabel)
                
            if nodelabel == 'XX': in_solid_domain = 0
            if node >= len(TT)/2: in_solid_domain = 0

        num_solid_domain_nodes = node
        #print 'num_solid_domain_nodes =', num_solid_domain_nodes

        #once we have individiual nodes classified we construct the corresponding DFA from their connections
    
        #first consturct a hidden markov chain transition matrix representation for the DFA
        MM_HMC = zeros((num_symbols, num_machine_states, num_machine_states), float)
        for s in xrange(num_symbols):
            for x in xrange(num_solid_domain_nodes):
            
                i = nodelabels[x]
                j_node = TT[x][2][s]
                if j_node != 'X'and j_node < num_solid_domain_nodes:
                    j = nodelabels[j_node]
                    MM_HMC[s][i][j] = 1

        #next convert this to a list of states form of DFA
        MM = []
        for x in xrange(num_machine_states):
            MM.append([1,['X','X']])

        for s in xrange(num_symbols):
            for x in xrange(num_machine_states):
                for y in xrange(num_machine_states):

                    if MM_HMC[s][x][y] == 1:MM[x][1][s] = y

        #finally add in a 'junk state' everything transitions to on 'X' (when it wouldn't have anywhere to go)
        MM.append([0,[num_machine_states,num_machine_states]])
        for state in xrange(len(MM)):
            for symbol in xrange(num_symbols):
                if MM[state][1][symbol] == 'X':MM[state][1][symbol] = num_machine_states


    if Error == 1:
        MM = 'no machine'
        MM_HMC ='no machine'


    return Error, MM, MM_HMC


# (9) define construct_ECA_domain_machines(filename)
# This function loops through all ECAs 1,2,... 256 attempts to find their domain machines and writes the results to a textfile - filename

def construct_ECA_domain_machines(filename):  
    file = open(filename, 'w')

    paramater_list = [0.2, 0.3, 0.1, 0.4] # values of parameter cut_off
    Range = 1
    num_symbols = 2
    num_iterations = 1000
    num_cells = 1000
    D = 30
    max_num_nodes_in_tree = 10000
    max_L = 10
    min_L = 3
    num_times_equal_machine_required = 2

    for rule_number in range(1,257):
        
        print 'rule_number =', rule_number 
        go = 1
        Error = 0
        parameter_setting = -1
    
        while go == 1:
        
            parameter_setting = parameter_setting + 1
            print 'parameter_setting =', parameter_setting
            cut_off = paramater_list[parameter_setting]
        
            if parameter_setting == 0:
                data = generate_CA_data(Range, rule_number, num_iterations, num_cells)
                print 'done 1000 cell data generation'
                    
            Error, MM, MM_HMC = construct_epsilon_machine_ECA(data, num_symbols, cut_off, max_L, min_L, num_times_equal_machine_required, D, max_num_nodes_in_tree)
            print 'Error =', Error
            print 'MM =', MM
            #print 'MM_HMC =', MM_HMC
            #print ''

            if Error == 0:
                go = 0
                MM = str(MM)
                file.write(MM)
                file.write('\n')

            if (Error == 1) and (parameter_setting == 3):
                go = 0
                MM = str(MM)
                file.write(MM)
                file.write('\n')


    file.close()


# (10) define convert_ECA_domain_machines_to_process_graphs(filename, filename_out)
# This function extracts the process graphs for the various domains (1 or more) for each reconstructed epsilon machine in the textfile and writes them to a new textfile - filename_out

def convert_ECA_domain_machines_to_process_graphs(filename, filename_out):
    print 'filename =', filename
    print 'filename_out =', filename_out

#    #read in the data from text file and store the list of machines in memory as a list machine_list
    machine_list = []
    TextFile = open(filename)
    for line in TextFile:
        l = line[:-1]
        #print 'l =', l

        M = []
        length = len(l)
        next_state = []
        next_number = ''

        for x in xrange(length):
            s = l[x]
        
            if (s != '[') and (s != ']') and (s != ',') and (s != ' '):
                next_number = next_number + s

            if (s == '[') or (s == ']') or (s == ',') or (s == ' '):
                if next_number != '':
                    next_state.append(next_number)
                    next_number = ''

                    if len(next_state) == 3:
                        accept = int(next_state[0])
                        IS0 = int(next_state[1])
                        IS1 = int(next_state[2])
                        M.append([accept,[IS0,IS1]])
                        next_state = []

        #print 'M =', M
        machine_list.append(M)
    
        #print 'M[0] =', M[0]
        #print 'M[1] =', M[1]
        #print 'M[0][1][0]*5 =', M[0][1][0]*5
        #print ''

    print ''    
    print 'machine_list'
    for MM in machine_list:
        print MM

    #file.close(TextFile)  #for some reason won't let me close TextFile here. It gives errors.

    #determine process graphs for individual domains
    file = open(filename_out, 'w')
    rule_number = 0

    for x in xrange(len(machine_list)):

        rule_number = rule_number + 1
        file.write(str(rule_number))
        file.write('\n')
    
        M = machine_list[x]
        if M == []:
            file.write(str([]))
            file.write('\n')

        if M != []:
            process_graphs = construct_process_graphs(M)

            for pg in process_graphs:
                file.write(str(pg))
                file.write('\n')

    file.close()


# (11) define extract_process_graphs(filename). Takes a textfile of ECA process graphs and extracts them into an array for use in the program
def extract_process_graphs(filename):

    all_rules_process_graphs_list = []
    rule_n_process_graphs_list = []
    TextFile = open(filename)

    for line in TextFile:
        l = line[:-1]
        #print 'l =', l
        
        if (len(l) < 4) and (l != '1') and (l!= '[]'):
            all_rules_process_graphs_list.append(rule_n_process_graphs_list)
            rule_n_process_graphs_list = []
            
        if len(l) > 4:
            PG = []
            length = len(l)
            next_state = []
            next_number = ''

            for x in xrange(length):
                s = l[x]
        
                if (s != '[') and (s != ']') and (s != ',') and (s != ' '):
                    next_number = next_number + s

                if (s == '[') or (s == ']') or (s == ',') or (s == ' '):
                    if next_number != '':
                        next_state.append(next_number)
                        next_number = ''

                        if len(next_state) == 2:
                            IS0 = next_state[0]
                            #print 'IS0 =', IS0
                            IS1 = next_state[1]
                            #print 'IS1 =', IS1

                            if IS0 == "'X'":IS0 = 'X'
                            if IS1 == "'X'":IS1 = 'X'
                            if IS0 != 'X':IS0 = int(IS0)
                            if IS1 != 'X':IS1 = int(IS1)
                            #print 'next_state', [IS0, IS1]
                            #print ''
                            PG.append([IS0,IS1])
                            next_state = []

            #print 'PG =', PG
            rule_n_process_graphs_list.append(PG)
            #print 'rule_n_process_graphs_list = ', rule_n_process_graphs_list
            #print 'next process graph'

    all_rules_process_graphs_list.append(rule_n_process_graphs_list) #the last one
    
    print ''    
    print 'process_graphs_list'
    for x in xrange(len(all_rules_process_graphs_list)):
        print x+1
        #print all_rules_process_graphs_list[x]
        for y in all_rules_process_graphs_list[x]:
            print y
        print ''

    file.close(TextFile)
    return all_rules_process_graphs_list


# (12) define test_process_graphs_as_domains
# this function converts the process graphs for each rule number back into single domain DFAs and 'iterates the machines' under the ECA rule
# by composing with the update transducer to determine whether the proposed domain machines are in fact periodic domains for some p = 1,2,...

def test_process_graphs_as_domains(filename):
    Process_Graphs = extract_process_graphs(filename)
    
    for rule_number in range(1,257):
        print ''
        print ''
        print 'rule_number =', rule_number
        T = construct_update_transducer(rule_number)
        print 'T =', T
        print ''
        
        for Process_Graph in Process_Graphs[rule_number - 1]:
            print 'Process_Graph =', Process_Graph
            M = convert_process_graph_to_DFA(Process_Graph)
            MM = [M,M,M,M,M,M]
            #for x in MM:
            #    print x
            M_k = [M]
            #print 'M =', M
            #print 'M_k =', M_k
            
            for count in range(1,6):
                
                TT, TTT = compose_Transducer_and_DFA(T,M_k[count - 1])
                DFA = construct_T_out(TTT, 2, 3) #2 input symbols {0,1}, 3 output symbols {0,1,2} where 2 = lambda = null symbol
                DFA_min = minimize_DFA(DFA)
                #print 'DFA =', DFA
                #print 'DFA_min =', DFA_min
                M_k.append(DFA_min)

            print 'M_k[1] =', M_k[1]
            #print 'M_k ='
            #for x in M_k:
            #    print x
                
            if len(MM[1]) == len(M_k[1]):
                equivalent1 = check_equivalence_of_DFAs(MM[1],M_k[1])
                #print 'equivalent1 =', equivalent1
                
            if len(MM[2]) == len(M_k[2]):
                equivalent2 = check_equivalence_of_DFAs(MM[2],M_k[2])
                #print 'equivalent2 =', equivalent2 
            
            if len(MM[3]) == len(M_k[3]):
                equivalent3 = check_equivalence_of_DFAs(MM[3],M_k[3])
                #print 'equivalent3 =', equivalent3
                
            if len(MM[4]) == len(M_k[4]):
                equivalent4 = check_equivalence_of_DFAs(MM[4],M_k[4])
                #print 'equivalent4 =', equivalent4
                
            if len(MM[5]) == len(M_k[5]):
                equivalent5 = check_equivalence_of_DFAs(MM[5],M_k[5])
                #print 'equivalent5 =', equivalent5

            if len(MM[1]) != len(M_k[1]): equivalent1 = 0
            if len(MM[2]) != len(M_k[2]): equivalent2 = 0
            if len(MM[3]) != len(M_k[3]): equivalent3 = 0
            if len(MM[4]) != len(M_k[4]): equivalent4 = 0
            if len(MM[5]) != len(M_k[5]): equivalent5 = 0

            print 'equivalent0 =', 1
            print 'equivalent1 =', equivalent1
            print 'equivalent2 =', equivalent2
            print 'equivalent3 =', equivalent3
            print 'equivalent4 =', equivalent4
            print 'equivalent5 =', equivalent5
            
            print ''


########## MAIN PROGRAM TO TEST FUNCTIONS ###########

# (1) test convert_DFA_to_hidden_markov_chain(M):
#M = [   [1,[0,1]], [1,[2,3]], [1,[1,1]], [0,[3,3]]  ] # DFA for the rule 18 domain
#M = [   [1,[1,2]], [1,[7,4]], [1,[1,3]], [1,[1,6]], [1,[7,5]], [1,[7,6]], [1,[1,7]], [0,[7,7]]     ]   # ECA 54 Domain {1110...}
#M = [   [1,[2,1]], [1,[4,7]], [1,[3,1]], [1,[6,1]], [1,[5,7]], [1,[6,7]], [1,[7,1]], [0,[7,7]]     ]   # ECA 54 Domain {0001...}
#M = [   [1,[1,2,3]], [1,[1,2,5]], [1,[0,0,4]], [1,[3,1,5]], [1,[3,5,5]], [0,[5,5,5]]   ]

#HMC = convert_DFA_to_hidden_markov_chain(M)
#print HMC


# (2) test generate_data(HMC,num_data_points):
#M = [   [1,[0,1]], [1,[2,3]], [1,[1,1]], [0,[3,3]]  ] # DFA for the rule 18 domain
#M = [   [1,[1,2]], [1,[7,4]], [1,[1,3]], [1,[1,6]], [1,[7,5]], [1,[7,6]], [1,[1,7]], [0,[7,7]]     ]   # ECA 54 Domain {1110...}
#M = [   [1,[2,1]], [1,[4,7]], [1,[3,1]], [1,[6,1]], [1,[5,7]], [1,[6,7]], [1,[7,1]], [0,[7,7]]     ]   # ECA 54 Domain {0001...}
#M = [   [1,[1,2,3]], [1,[1,2,5]], [1,[0,0,4]], [1,[3,1,5]], [1,[3,5,5]], [0,[5,5,5]]   ]

#HMC = convert_DFA_to_hidden_markov_chain(M)
#print HMC
#data = generate_data(HMC,25)
#print data


# (3) test generate_CA_data(Range, rule_number, num_iterations, num_cells)
#Range = 1
#rule_number = 18
#num_iterations = 100
#num_cells = 25
#final_state = generate_CA_data(Range, rule_number, num_iterations, num_cells)
#print 'final_state =', final_state


# (4) test plot_sequence_data(sequence_data)

#M = [   [1,[0,1]], [1,[2,3]], [1,[1,1]], [0,[3,3]]  ] # DFA for the rule 18 domain
#M = [   [1,[1,2]], [1,[7,4]], [1,[1,3]], [1,[1,6]], [1,[7,5]], [1,[7,6]], [1,[1,7]], [0,[7,7]]     ]   # ECA 54 Domain {1110...}
#M = [   [1,[2,1]], [1,[4,7]], [1,[3,1]], [1,[6,1]], [1,[5,7]], [1,[6,7]], [1,[7,1]], [0,[7,7]]     ]   # ECA 54 Domain {0001...}

#HMC = convert_DFA_to_hidden_markov_chain(M)
#print HMC
#data = generate_data(HMC,25)
#print data
#plot_sequence_data(data)


# (5) test plot_sequence_data_machines(machines)
#num_data_points = 100
#machines = []
#machines.append( [   [1,[0,1]], [1,[2,3]], [1,[1,1]], [0,[3,3]]  ] )                                               # DFA for the rule 18 domain
#machines.append( [   [1,[1,2]], [1,[7,4]], [1,[1,3]], [1,[1,6]], [1,[7,5]], [1,[7,6]], [1,[1,7]], [0,[7,7]]  ] )   # ECA 54 Domain {1110...}
#machines.append( [   [1,[2,1]], [1,[4,7]], [1,[3,1]], [1,[6,1]], [1,[5,7]], [1,[6,7]], [1,[7,1]], [0,[7,7]]  ] )   # ECA 54 Domain {0001...}
#plot_sequence_data_machines(machines,num_data_points)


# (6) test remove_redundancies_from_list(L):
#L = [1,2,3,2,4,4,7,1,0,2,9]
#L = ['cat','bear','bear','dog','elephant','cat','dog']
#L = [ [1,2,3] , [4,5], [6,2,3], [4,5], [2,3,7], [1,2,3], [1,2,3], [4,5]]
#L = [1,2,3]
#L = ['cat','cat','cat']
#L = [1, 'dog', [4,5,6],'dog']

#LL = remove_redundancies_from_list(L)
#print LL

            
# (7) test construct_epsilon_machince(data,num_symbols)

#completely periodic data 
#data = [1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1] #period 5 - 9 states + junk_state
#data = [0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1]                                           #period 4 - 7 states + junk_state
#data = [1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,0,1,1]                                       #period 4 - 7 states + junk_state
#data = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]                                                                               #period 2 - 3 states + junk_state

#completely random data
#data = []
#for n in xrange(2000):
#    data.append(randint(0,2))

#machine generated data, some non-periodic (probabilistic transitions)
#M = [   [1,[0,1]], [1,[2,3]], [1,[1,1]], [0,[3,3]]  ]                                                  # DFA for the rule 18 domain
#M = [   [1,[1,2]], [1,[7,4]], [1,[1,3]], [1,[1,6]], [1,[7,5]], [1,[7,6]], [1,[1,7]], [0,[7,7]]     ]   # ECA 54 Domain {1110...}
#M = [   [1,[2,1]], [1,[4,7]], [1,[3,1]], [1,[6,1]], [1,[5,7]], [1,[6,7]], [1,[7,1]], [0,[7,7]]     ]   # ECA 54 Domain {0001...}

#M = [   [1,[1,2]], [1,[3,6]], [1,[4,5]], [1,[7,5]], [1,[3,3]], [1,[4,7]], [1,[4,5]], [0,[7,7]]  ]      # Other machine I made with domain 0,W,1,0,W,1,0,W,1 ... where W = wildcard
#M = [[1, [1, 2]], [1, [3, 2]], [1, [4, 5]], [1, [6, 5]], [1, [3, 3]], [1, [4, 6]], [0, [6, 6]]]        # the output copy from the 0,W,1 domain machine

# Machine I made with domain 0,W,1,W,0,W,1,W,0,W,1,W...
#M = [[1, [1, 2]], [1, [3, 4]], [1, [5, 6]], [1, [7, 4]], [1, [8, 6]], [1, [3, 7]], [1, [5, 8]], [1, [11, 10]], [1, [9, 11]], [1, [7, 7]], [1, [8, 8]],  [0, [11, 11]]]             

# Random-Random-zero process
#M = [[1, [0, 1]], [1, [2, 3]], [1, [4, 5]], [1, [6, 7]], [1, [1, 1]], [1, [3, 3]], [1, [5, 5]], [0, [7, 7]]]


#HMC = convert_DFA_to_hidden_markov_chain(M)
#data = generate_data(HMC,1000)

#num_symbols = 2
#MM, MM_HMC = construct_epsilon_machince(data,num_symbols)
#print''
#print 'MM_HMC =', MM_HMC
#print ''
#print 'MM =', MM
#print ''
#print 'M =', M

#equivalent = check_equivalence_of_DFAs(M,MM)
#print ''
#print 'equivalent =', equivalent 


# (8) test construct_epsilon_machine_ECA(data, num_symbols, cut_off, max_L, min_L, num_times_equal_machine_required, D, max_num_nodes_in_tree)
#Range = 1
#rule_number = 54
#num_iterations = 1000
#num_cells = 1000
#data = generate_CA_data(Range, rule_number, num_iterations, num_cells)
#print 'done data generation'
#print 'data =', data
#num_symbols = 2
#cut_off = 0.4
#D = 30
#max_num_nodes_in_tree = 10000
#max_L = 10
#min_L = 3
#num_times_equal_machine_required = 2


#Error, MM, MM_HMC = construct_epsilon_machine_ECA(data, num_symbols, cut_off, max_L, min_L, num_times_equal_machine_required, D, max_num_nodes_in_tree) 
#print 'Error =', Error

# DFA for the rule 18 domain 
#M = [   [1,[0,1]], [1,[2,3]], [1,[1,1]], [0,[3,3]]  ]

#DFA for rule 54 (both domains)
#M = [   [1,[1,2]], [1,[3,4]], [1,[5,6]], [1,[10,7]], [1,[8,13]], [1,[9,12]], [1,[11,14]], [1,[8,15]], [1,[9,15]], [1,[10,15]], [1,[15,7]], [1,[15,12]], [1,[15,13]], [1,[15,14]], [1,[11,15]], [0,[15,15]]     ]

#if Error == 0:
#    equivalent = check_equivalence_of_DFAs(M,MM)
#    print ''
#    print 'equivalent =', equivalent 
    

# (9), (10), (11), (12) Use the Main Functions to do ECA Domain Reconstruction for all rule numbers

#construct_ECA_domain_machines('ECA_Domain_Machines_file')                                                                  # (9)
#convert_ECA_domain_machines_to_process_graphs('ECA_Domain_Machines_file_copy.txt', 'ECA_Process_Graphs_file')              # (10)
#extract_process_graphs('ECA_Process_Graphs_file_copy')                                                                     # (11)
#test_process_graphs_as_domains('ECA_Process_Graphs_file_copy')                                                             # (12)