# Single Agent Dynamics for Rock-Paper-Scissors game
# For Rock-Paper-Scissors, there are only three actions
# Main equation is dotxi = xi*[beta*(Ri-R)+alpha*(Hi-H)]
#
# Ri is the reward for each action i, R is the average
# Hi is the self-information of each action i, H is the average
#
# Alpha controls memory where   alpha>0 -> memory loss
#                               alpha=0 -> perfect memory
from visual import *
from Numeric import *
from visual.controls import *

#-------------------------------------------------------------------------
print """
Right button drag to rotate camera to view scene.
Middle button drag up or down to zoom in or out.

Button Controls

Press the "Go/Stop" button to start simulation.
Press the button again to stop/start simulation.

The Alpha scale changes the memory loss rate.
The Beta scale changes the reaction to current rewards.
"""
#-------------------------------------------------------------------------
# Initialize the simulation

# constants
epsilon = 0.5
beta = 0.1
alpha = 0.0
dt = 0.01

scene.title = "One agent playing Rock-Papers-Scissors"
scene.background = (1,1,1)     # Makes background white

# Draw the axes
curve( pos = [ (-1,0,0), (1.5,0,0) ], color = (0,1,0), radius = 0.06 )
curve( pos = [ (0,-1,0), (0,1.5,0) ], color = (0,1,0), radius = 0.06 )
curve( pos = [ (0,0,-1), (0,0,1.5) ], color = (0,1,0), radius = 0.06 )

# Draw the state space
curve( pos = [ (1,0,0), (0,0,1) ], color = (0,0,1), radius = 0.03 )
curve( pos = [ (0,1,0), (0,0,1) ], color = (0,0,1), radius = 0.03 )
curve( pos = [ (1,0,0), (0,1,0) ], color = (0,0,1), radius = 0.03 )

# The curve of the agent's action probabilities
OneAgent = curve( color = (1,0,0), radius=0.02 )

# Create controls window for buttons and sliders
c = controls()

# Create a button in the controls window:
bGo = button( pos=(0,0), width=60, height=20, text='Go', action=lambda: change() )

# Create sliders for constants alpha and beta
sAlpha = slider(pos=(-30,60), min=0.0, max=1.0, axis=(60,0))
sAlpha.value = alpha
lAlpha = label(pos=(0,75), text="Alpha = %2.2f" % sAlpha.value, opacity=0, box=0, line=0, display=c.display)
sBeta = slider(pos=(-30,30), min=0.0, max=1.0, axis=(60,0))
sBeta.value = beta
lBeta = label(pos=(0,45), text="Beta = %2.2f" % sBeta.value, opacity=0, box=0, line=0, display=c.display)

#-------------------------------------------------------------------------
# Functions used in the program

# Define a class for the ODE udot
# x,y,z are each components of u (x=u1,y=u2,z=u3)
# The equation is udot = -beta*(R-aveR) - alpha*u
class uODE:
    def __init__(self,beta,alpha,R1,R2,R3,aveR):
        self.beta = beta
        self.alpha = alpha
        self.R1 = R1
        self.R2 = R2
        self.R3 = R3
        self.aveR = aveR
    def xdot(self,x,y,z):
        return -1*(self.beta*(self.R1-self.aveR) + self.alpha*x)
    def ydot(self,x,y,z):
        return -1*(self.beta*(self.R2-self.aveR) + self.alpha*y)
    def zdot(self,x,y,z):
        return -1*(self.beta*(self.R3-self.aveR) + self.alpha*z)

# Fourth order Runge-Kutta
# This version returns a vector with 3 components
def RK4_3D(f,dt,x,y,z):
    k1x = dt * f.xdot(x,y,z)
    k1y = dt * f.ydot(x,y,z)
    k1z = dt * f.zdot(x,y,z)

    k2x = dt * f.xdot(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
    k2y = dt * f.ydot(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)
    k2z = dt * f.zdot(x + k1x/2.0, y + k1y/2.0, z + k1z/2.0)

    k3x = dt * f.xdot(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
    k3y = dt * f.ydot(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)
    k3z = dt * f.zdot(x + k2x/2.0, y + k2y/2.0, z + k2z/2.0)

    k4x = dt * f.xdot(x + k3x, y + k3y, z + k3z)
    k4y = dt * f.ydot(x + k3x, y + k3y, z + k3z)
    k4z = dt * f.zdot(x + k3x, y + k3y, z + k3z)

    x = x + ( k1x + 2.0*k2x + 2.0*k3x + k4x )/6.0
    y = y + ( k1y + 2.0*k2y + 2.0*k3y + k4y )/6.0
    z = z + ( k1z + 2.0*k2z + 2.0*k3z + k4z )/6.0

    temp = array([x,y,z])
    return temp

# Called by controls when button is clicked
# If go is false, then we change go to true to start integration
# Vice versa for when go is true
def change(): 
	global go
	if go == true:
		go = false
		bGo.text="Go"
	else:
		go = true
		bGo.text="Stop"

# Function to transform x into u
def TransME(x):
    # Initialize H, the self information/surprise for each action
    for t in range(3):
        if x[t] < 1e-15:
            x[t] = 1e-15
    H = -log(x)

    # Transform the xi's into ui's using the self-information array above
    sumH = dot(H,array([1,1,1]))    #find the sum of the H's
    u = H - (1.0/3.0)*sumH
    return u
#-------------------------------------------------------------------------
# The initialize rewards for each action
R = array([2/3*epsilon, -1 - 1/3*epsilon, 1 - 1/3*epsilon])

# Initialize the probabilities, their sum should equal 1
x = array([0.6,0.2,0.2])
OneAgent.append(pos=x)
# initpos is a sphere indicating the initial position
# currpos is a sphere indicating the current position
initpos = sphere(pos=OneAgent.pos[0], color = (0,0,1), radius=0.03)
currpos = sphere(pos=OneAgent.pos[0], color = (1,0,0), radius=0.04)

### A ball representing the Nash equilibrium
##ball = sphere(pos = (1/3,1/3,1/3), color = (1,0,1), radius = 0.03 )

# Transform x into u
u = TransME(x)

go = false      # Controls the button functions

# This loop graphs the trajectory for one agent
# The second loop checks for go which is controlled by the button
while true:
	while (go):
	    # Find the average of the rewards and self-information
	    # by using the dot product
		aveR = dot(R,x)

		# Make ODE for udot
		f = uODE(beta,alpha,R[0],R[1],R[2],aveR)

		# Integrate
		u = RK4_3D(f,dt,u[0],u[1],u[2])

		# Transform ui's into xi's to display
		temp = dot(exp(-u),array([1,1,1]))      # sum of the exp(ui)'s
		x = exp(-u)/temp

		OneAgent.append(pos = x)
		currpos.pos = x

		# Check for mouse events and update alpha and beta values
		c.interact() 
		lAlpha.text = "Alpha = %2.2f" % sAlpha.value
		alpha = sAlpha.value
		lBeta.text = "Beta = %2.2f" % sBeta.value
		beta = sBeta.value

##		# Check for mouse events for new initial conditions
##		if scene.mouse.clicked:
##                    m = scene.mouse.getclick()
##                    loc = m.pos
##                    u = TransME(loc)
##                    OneAgent = curve(pos = loc, color = (1,0,0), radius = 0.02)
##                    OneAgent.append(loc)
##                    initpos = sphere(pos = loc, color = (0,0,1), radius = 0.03)
##                    currpos = sphere(pos = loc, color = (1,0,0), radius = 0.04)
                
		rate(100)
        # End while
	c.interact() # Check for mouse events and drive specified actions
	# Update alpha and beta values
	lAlpha.text = "Alpha = %2.2f" % sAlpha.value
	lBeta.text = "Beta = %2.2f" % sBeta.value
# end while

# OneAgent.visible = 0
# pygame.time.wait(5000)
# del OneAgent
# OneAgent.visible = 1
 

#    rate(50)

print x
