深度学习


迷路问题

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
fig = plt.figure(figsize = (5,5))
ax = plt.gca()
plt.plot([1,1],[0,1],color = 'red',linewidth=2)
plt.plot([1,2],[2,2],color = 'red',linewidth=2)
plt.plot([2,2],[2,1],color = 'red',linewidth=2)
plt.plot([2,3],[1,1],color = 'red',linewidth=2)

plt.text(0.5,2.5,'s0',size = 14,ha = 'center')
plt.text(1.5,2.5,'s1',size = 14,ha = 'center')
plt.text(2.5,2.5,'s2',size = 14,ha = 'center')
plt.text(0.5,1.5,'s3',size = 14,ha = 'center')
plt.text(1.5,1.5,'s4',size = 14,ha = 'center')
plt.text(2.5,1.5,'s5',size = 14,ha = 'center')
plt.text(0.5,0.5,'s6',size = 14,ha = 'center')
plt.text(1.5,0.5,'s7',size = 14,ha = 'center')
plt.text(2.5,0.5,'s8',size = 14,ha = 'center')
plt.text(0.5,2.3,'Start',ha = 'center')
plt.text(2.5,0.3,'goal',ha = 'center')

ax.set_xlim(0,3)
ax.set_ylim(0,3)
plt.tick_params(axis='both',which='both',bottom='off',top='off',labelbottom='off',right='off',left='off',labelleft='off')

line, = ax.plot([0.5],[2.5],marker="o",color="g",markersize=60)

png

theta_0 = np.array([[np.nan,1,1,np.nan],
[np.nan,1,np.nan,1],
[np.nan,np.nan,1,1],
[1,1,1,np.nan],
[np.nan,np.nan,1,1],
[1,np.nan,np.nan,np.nan],
[1,np.nan,np.nan,np.nan],
[1,1,np.nan,np.nan]])
def simple_convert_into_pi_from_theta(theta):
    [m,n] = theta.shape
    pi = np.zeros((m,n))
    for i in range(0,m):
        pi[i,:]=theta[i,:]/np.nansum(theta[i,:])
    pi = np.nan_to_num(pi)
    return pi
pi_0 = simple_convert_into_pi_from_theta(theta_0)
pi_0
[[0.         0.5        0.5        0.        ]
 [0.         0.5        0.         0.5       ]
 [0.         0.         0.5        0.5       ]
 [0.33333333 0.33333333 0.33333333 0.        ]
 [0.         0.         0.5        0.5       ]
 [1.         0.         0.         0.        ]
 [1.         0.         0.         0.        ]
 [0.5        0.5        0.         0.        ]]
def get_next_s(pi,s):
    direction = ["up","right","down","left"]
    next_direction = np.random.choice(direction,p = pi[s,:])
    if next_direction == "up":
        s_next = s-3
    elif next_direction == "right":
        s_next = s+1
    elif next_direction == "down":
        s_next = s+3
    elif next_direction == "left":
        s_next = s-1
    return s_next
def goal_maze(pi):
    s = 0
    state_history = [0]
    while(1):
        next_s = get_next_s(pi,s)
        state_history.append(next_s)
        if next_s == 8:
            break
        else:
            s = next_s
    return state_history  
state_history = goal_maze(pi_0)
print(state_history)
print("迷路问题解决step:" + str(len(state_history)-1))
[0, 3, 0, 1, 2, 1, 2, 5, 2, 1, 0, 3, 4, 7, 8]
迷路问题解决step:14
from matplotlib import animation
from IPython.display import HTML  
# IPython这是是大写
def init():
    line.set_data([],[])  
    return (line,)
def animate(i):
    state = state_history[i]
    x = ((state%3)+0.5)
    y = 2.5 - int(state/3)
    line.set_data(x,y)
    return (line,)
anim = animation.FuncAnimation(fig,animate,init_func = init,frames = len(state_history),interval = 200,repeat = False)
HTML(anim.to_jshtml())

gif


评论
  目录