# Two Agents Dynamics for Rock-Paper-Scissors game
# Main equations:   dotxi = xi*[beta*(Ri-R)+alpha*(Hi-H)]
#                   dotyi = yi*[beta*(Ri-R)+alpha*(Hi-H)]
#
# Ri is the reward for each action i and is represented by a 3x3 2-D matrix
#           There should be two matrices, but since the # of actions for
#           both agents is the same, we can use the same matrix.
# R is the average found with a double sum of the product of R[n][m]*xn*ym
# Hi is the self-information of each action i, H is the average
#
# Alpha controls memory where   alpha>0 -> memory loss
#                               alpha=0 -> perfect memory

from visual import *
from Numeric import *
from visual.controls import *

print """
Right button drag to rotate camera to view scene.
Middle button drag up or down to zoom in or out

Button Controls

"Go/Stop": Press to start simulation.  Press again to stop/start simulation.
"Clear": Press to clear old trajectories from the screen
"Reset": Press after selecting a new initial condition from the menu.

The Alpha scale changes the memory loss rate.
The Beta scale changes the reation to current rewards.
"""
#----------------------------------------------------------------------
# Initialize the simulation

# parameters
ex = 0.5
ey = -0.3
betaX = 1.0
betaY = 1.0
alphaX = 0.0198
alphaY = 0.01
dt = 0.1

rateVal = 300

scene.title = "Two agents playing Rock-Papers-Scissors"
scene.center = vector(0,0.5,1)
scene.background = (1,1,1)

# Draw the state space for x.  It is a transform of the original state space
# by subtracting 1 to x coordinate and adding 1 from z coordinate
curve( pos = [ (0,0,1), (-1,0,2) ], color = (1,0,1), radius = 0.009 )
curve( pos = [ (0,0,1), (-1,1,1) ], color = (1,0,1), radius = 0.009 )
curve( pos = [ (-1,0,2), (-1,1,1) ], color = (1,0,1), radius = 0.009 )

# Draw the state space for y
curve( pos = [ (1,0,0), (0,0,1) ], color = (0,0,1), radius = 0.009 )
curve( pos = [ (0,1,0), (0,0,1) ], color = (0,0,1), radius = 0.009 )
curve( pos = [ (1,0,0), (0,1,0) ], color = (0,0,1), radius = 0.009 )

# Create controls window for buttons and sliders
c = controls(title="Two-Agent RPS", width=400, height=450, range=220)

# Create a button in the controls window
# bGo: start/stop the simulation
# bClear: clear older trajectories from the screen
# bReset: Restart the entire simulation (with new initial conditions as an option)
bGo = button( pos=(-160,150), width=60, height=35, text='Go', action=lambda: change() )
bClear = button( pos=(-80,150), width=60, height=35, text='Clear', action=lambda: clear() )
bReset = button( pos=(0,150), width=60, height=35, text='Reset', action=lambda: reset() )
#bLeft = button(pos = (-160,70), width = 20, height =20, text = '<', action = lambda: left(1))

# Create sliders for constants alpha and beta
sAlphaX = slider(pos=(-140,70), min=0.0, max=1.0, axis=(80,0))
sAlphaX.value = alphaX
lAlphaX = label(pos=(-80,90), text="AlphaX = %3.3f" % sAlphaX.value, opacity=0, box=0, line=0, display=c.display)
sBetaX = slider(pos=(-140,20), min=0.0, max=10.0, axis=(80,0))
sBetaX.value = betaX
lBetaX = label(pos=(-80,40), text="BetaX = %3.3f" % sBetaX.value, opacity=0, box=0, line=0, display=c.display)

sAlphaY = slider(pos=(-140,-30), min=0.0, max=1.0, axis=(80,0))
sAlphaY.value = alphaY
lAlphaY = label(pos=(-80,-10), text="AlphaY= %3.3f" % sAlphaY.value, opacity=0, box=0, line=0, display=c.display)
sBetaY = slider(pos=(-140,-80), min=0.0, max=10.0, axis=(80,0))
sBetaY.value = betaY
lBetaY = label(pos=(-80,-60), text="BetaY = %3.3f" % sBetaY.value, opacity=0, box=0, line=0, display=c.display)

# Menu to change the rate of the simulation
mRate = menu(pos = (-160,-120), text = "Rate", width = 60, height = 20)
mRate.items.append(("100", lambda: setRate(100)))
mRate.items.append(("200", lambda: setRate(200)))
mRate.items.append(("400", lambda: setRate(400)))
mRate.items.append(("600", lambda: setRate(600)))

# Menu to change the initial condition
m = menu(pos=(120,150), text="IC", width=60, height=20)
# label for the menu so we know which IC we are looking at
lmenu = label(pos = (120,180), text = "IC1",opacity = 0, box = 0, line = 0, display = c.display)
m.items.append(("IC1", lambda: setIC(1)))
m.items.append(("IC2", lambda: setIC(2)))
m.items.append(("IC3", lambda: setIC(3)))
m.items.append(("IC4", lambda: setIC(4)))
m.items.append(("IC5", lambda: setIC(5)))
m.items.append(("IC6", lambda: setIC(6)))
m.items.append(("IC7", lambda: setIC(7)))
m.items.append(("IC8", lambda: setIC(8)))
m.items.append(("IC9", lambda: setIC(9)))
m.items.append(("IC10", lambda: setIC(10)))
m.items.append(("IC11", lambda: setIC(11)))
m.items.append(("IC12", lambda: setIC(12)))
#-------------------------------------------------------------------
# Moves the vector for x so that its trajectory is next to y's
def transX(x):
    temp = array( [x[0]-1, x[1], x[2]+1] )
    return temp

# Creates a ODE of the form of udot
# udot = -beta*(R-aveR) - alpha*u
# Equation 33
class uODE:
    def __init__(self,beta,alpha,R1,R2,R3):
        self.beta = beta
        self.alpha = alpha
        self.R1 = R1
        self.R2 = R2
        self.R3 = R3
    def xdot(self,x,y,z):
        return -1*(self.beta*(self.R1) + self.alpha*x)
    def ydot(self,x,y,z):
        return -1*(self.beta*(self.R2) + self.alpha*y)
    def zdot(self,x,y,z):
        return -1*(self.beta*(self.R3) + self.alpha*z)

# Fourth order Runge-Kutta
# This version returns a vector with 3 components
def RK4_3D(f,dt,x,y,z):
    k1x = dt * f.xdot(x,y,z)
    k1y = dt * f.ydot(x,y,z)
    k1z = dt * f.zdot(x,y,z)
    
    k2x = dt * f.xdot(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
    k2y = dt * f.ydot(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
    k2z = dt * f.zdot(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)

    k3x = dt * f.xdot(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
    k3y = dt * f.ydot(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
    k3z = dt * f.zdot(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)

    k4x = dt * f.xdot(x + k3x, y + k3y, z + k3z)
    k4y = dt * f.ydot(x + k3x, y + k3y, z + k3z)
    k4z = dt * f.zdot(x + k3x, y + k3y, z + k3z)

    x = x + ( k1x + 2.0*k2x + 2.0*k3x + k4x )/6.0
    y = y + ( k1y + 2.0*k2y + 2.0*k3y + k4y )/6.0
    z = z + ( k1z + 2.0*k2z + 2.0*k3z + k4z )/6.0
    
    temp = array([x,y,z])
    return temp

def CenterMe():
    # Re-center the scene when the left mouse button is clicked
    if scene.mouse.clicked:
        m = scene.mouse.getclick()
        loc = m.pos
        scene.center = vector(loc)    

# Controls whether or not the simulation stops or goes
# If go is false, then we change go to true to start integration
# Vice versa for when go is true
def change(): 
	global go
	if go == true:
		go = false
		bGo.text="Go"
	else:
		go = true
		bGo.text="Stop"

# Clear curves for agents, keep last curve for display
# If index is greater than zero, we have more than one curve
# This is done for each agent X and Y:
#   1. create temp array to hold last curve
#   2. make all other curves invisible
#   3. delete the old array
#   4. Set Agent array to last element so last element is now
#       the first element
#   5. Set index equal to 0 b/c only one curve in array 
def clear():
    global XAgent, YAgent, index
    
    if index > 0:
        tempXAgent = []                 # create temp array
        tempXAgent.append(XAgent[-1])
        for i in range(index):          # make invisible
            XAgent[i].visible = 0
        del XAgent[:]                   # delete all curves
        XAgent = tempXAgent             # set to last element

        tempYAgent = []
        tempYAgent.append(YAgent[-1])
        for i in range(index):
            YAgent[i].visible = 0
        del YAgent[:]
        YAgent = tempYAgent
        index = 0

# Reset simulation
# If new initial condition has been specified, start
#       at the new condition
def reset():
	global u, v, x, y, Xinitpos, Xcurrpos, Yinitpos, Ycurrpos
	global X, Y, ICindex, ICarray, index, startup, go

	# If simulation running, want to stop it
	if go:
		change()
	
	#create empty curves
	XAgent.append(curve( color = (1,0,0), radius=0.01 ))
	YAgent.append(curve( color = (0,1,1), radius=0.01 ))

	# If we are just starting, there is no curve in the
	#   array so we don't need to use clear function
	# Else, clear all the old trajectories
	if startup:
		startup = false
	else:
		index +=1   # added the empty curve above
		clear()
		setRate(200)

	# set x and y to the new initial conditions
	x = ICarray[ICindex][X]
	y = ICarray[ICindex][Y]
	xtem = transX(x)
	
	XAgent[0].append(pos=xtem)      # append points to the curve
	YAgent[0].append(pos=y)

	# initpos is a sphere indicating the initial position for X and then Y
	# currpos is a sphere indicating the current position for X and then Y
	Xinitpos.pos = xtem
	Xcurrpos.pos = xtem
	Yinitpos.pos = y
	Ycurrpos.pos = y

	# Start transformation from x and y to u and v
	# Initialize H, the self information/surprise for each action
	# Check first if initial condition is too close to 0
	for t in range(3):
	    if x[t] < 1e-15:
	        x[t] = 1e-15
	    if y[t] < 1e-15:
	        y[t] = 1e-15
	Hx = -log(x)
	Hy = -log(y)

	# Transform xi's and yi's to find u and v
	sumHx = dot(Hx,array([1,1,1]))     # Get sum of the entropies
	sumHy = dot(Hy,array([1,1,1]))
	u = Hx - (1.0/3.0)*sumHx
	v = Hy - (1.0/3.0)*sumHy

# Selects a new initial condition based on what user selected
#       from the drop down menu
def setIC(ICval):
	global ICindex
	ICindex = ICval - 1
	lmenu.text = "IC%d" %ICval

# Sets the rate at which the simulation runs
def setRate(rVal):
    global rateVal
    rateVal = rVal

def left(val):
    global alphaX
    if sAlphaX.value >=0.001:
        sAlphaX.value -=0.001
    
#-------------------------------------------------------------------
# Initialize the probabilities, their sum should equal 1
# X and Y are indices for the x and y arrays in the
#   ICarray
X = 0
Y = 1
ICarray = []
#ICarray.append( array( [array([0.3,0.3,0.4]), array([0.3,0.6,0.1])] ) )        
ICarray.append( array( [array([0.5,0.3,0.2]), array([0.1,0.5,0.4])] ) )     
ICarray.append( array( [array([0.4,0.3,0.3]), array([0.3,0.3,0.4])] ) )     
ICarray.append( array( [array([0.5,0.3,0.2]), array([0.5,0.2,0.3])] ) )     
ICarray.append( array( [array([0.26,0.113333,0.626667]), array([0.165,0.772549,0.062451])] ) )  
#ICarray.append( array( [array([0.6,0.1,0.3]), array([0.2,0.3,0.5])] ) )     
ICarray.append( array( [array([0.05,0.35,0.6]), array([0.1,0.2,0.7])] ) )   
ICarray.append( array( [array([0.5,0.3,0.2]), array([0.0,0.3,0.7])] ) )
ICarray.append( array( [array([0.5,0.5,0.0]), array([0.0,0.3,0.7])] ) )     
#ICarray.append( array( [array([1.0,0.0,0.0]), array([0.0,0.3,0.7])] ) )     
ICarray.append( array( [array([1.0,0.0,0.0]), array([0.0,0.0,1.0])] ) )     
ICarray.append( array( [array([1.0,0.0,0.0]), array([1.0,0.0,0.0])] ) )     
ICarray.append( array( [array([0.2,0.3,0.5]), array([0.2,0.3,0.5])] ) )
ICarray.append( array( [array([0.4,0.1,0.5]), array([0.2,0.3,0.5])] ) )  
ICarray.append( array( [array([0.3,0.4,0.3]), array([0.3,0.4,0.3])] ) )

# Initialize some variables
startup = true      # indicates if we have just started the program or not
index = 0           # counts the number of curves that have more than 500 points
ICindex = 0         # index for the initial condition array
pointcount = 0      # counts the number of points on a curve
go = false          # allows or prevents integration from occuring

# Fill these arrays with dummy values
x = array([0.,0.,0.])
y = array([0.,0.,0.])

# The curve of the agent's action probabilities
XAgent = []
YAgent = []

#initpos is a sphere indicating the initial position for X and then Y
#currpos is a sphere indicating the current position for X and then Y
Xinitpos = sphere(color = (1,0,1), radius=0.03)
Xcurrpos = sphere(color = (1,0,0), radius=0.04)
Yinitpos = sphere(color = (1,0,1), radius=0.03)
Ycurrpos = sphere(color = (0,1,1), radius=0.04)

# Use reset to set the initial conditions
reset()

# Rewards Matrices
# The matrices are basically the same but we can vary the
#       epsilons to make the rewards different for the agents
A = array( [ [2/3*ex, -1 - 1/3*ex, 1 - 1/3*ex],
      [1 - 1/3*ex,2/3*ex,-1 - 1/3*ex],
      [-1 - 1/3*ex,1 - 1/3*ex,2/3*ex] ])

B = array( [ [2/3*ey, -1 - 1/3*ey, 1 - 1/3*ey],
      [1 - 1/3*ey,2/3*ey,-1 - 1/3*ey],
      [-1 - 1/3*ey,1 - 1/3*ey,2/3*ey] ])


#-------------------------------------------------------------------
# 
while true:
    CenterMe()      # Re-center the simulation with mouse
    while(go):
        # Find each dot product of the reward with x and y 
        rx = dot(A,y)       # Ay
        ry = dot(B,x)       # Bx

        Rx = dot(rx,array([1,1,1]))     # sum elements together
        Ry = dot(ry,array([1,1,1]))     # sum elements together
                 
        # Find the difference of the rewards with the average
        # From Equation 32
        tempRx = rx - Rx        # Ay - sum(Ay)
        tempRy = ry - Ry        # Bx - sum(Bx)

        # Make ODE for udot and vdot
        f = uODE(betaX,alphaX,tempRx[0],tempRx[1],tempRx[2])
        g = uODE(betaY,alphaY,tempRy[0],tempRy[1],tempRy[2])
        
        # For u_n and v_n, integrate to find u_n+1 and v_n+1       
        u = RK4_3D(f,dt,u[0],u[1],u[2])
        v = RK4_3D(g,dt,v[0],v[1],v[2])

        # Transform u and v back to x and y to display
        tempx = dot(exp(-u),array([1,1,1]))
        tempy = dot(exp(-v),array([1,1,1]))
        x = exp(-u)/tempx
        y = exp(-v)/tempy

        # Append the points to the curves
        xtem = transX(x)
        XAgent[index].append(pos = xtem)
        YAgent[index].append(pos = y)
        Xcurrpos.pos = xtem
        Ycurrpos.pos = y

        CenterMe()          # Re-center the simulation with mouse
        c.interact()        # Check for mouse events

        # Update the Alpha and Beta values
        lAlphaX.text = "AlphaX = %3.3f" % sAlphaX.value
        alphaX = sAlphaX.value
        lBetaX.text = "BetaX = %3.3f" % sBetaX.value
        betaX = sBetaX.value

        lAlphaY.text = "AlphaY = %3.3f" % sAlphaY.value
        alphaY = sAlphaY.value
        lBetaY.text = "BetaY = %3.3f" % sBetaY.value
        betaY = sBetaY.value

        #curves start to look bad after 500 iterations so start
        #       new curves
        pointcount += 1
        if pointcount == 500:
            xtem = transX(x)
            XAgent.append(curve(pos = [(xtem[0],xtem[1],xtem[2])], color = (1,0,0), radius=0.01 ))
            YAgent.append(curve(pos = [(y[0],y[1],y[2])], color = (0,1,1), radius=0.01 ))
            pointcount = 0
            index += 1
            print "index = " + str(index)
            

        rate(rateVal)
    # End while

    # Check for mouse events again
    c.interact()
    lAlphaX.text = "AlphaX = %3.3f" % sAlphaX.value
    lBetaX.text = "BetaX = %3.3f" % sBetaX.value
    lAlphaY.text = "AlphaY = %3.3f" % sAlphaY.value
    lBetaY.text = "BetaY = %3.3f" % sBetaY.value
# End while