问题1: 一维随机游走
参考:https://github.com/hockyg/chem-ga-2600/blob/master/statistics/statistics-week1.ipynb
1
2
3
4
5
6
7
|
import numpy as np
import matplotlib.pyplot as plt
#独立的模拟数
number_of_independent_simulations = 10
#步数
number_of_steps_to_take = 10000
|
1
2
3
4
5
6
7
|
#此函数将会绘制给定轨迹的结果
def plot_trajectories(trajectory_list):
for i in range(len(trajectory_list)):
plt.plot(trajectory_list[i])
plt.axhline(0,linestyle='--',color='black',lw=2)
plt.ylabel("Position")
plt.xlabel("Time")
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
# 在此练习中我们需要随机数0-1转换为a -1 或者 a 1
def random_to_direction(random_number):
# 将0-0.5转换为-1,将0.5-1转换为1
# numpy.floor是一个数学函数,用于返回数组元素的底片。标量x的地板是最大的整数i,这样i<=x。
direction = 1-2*np.floor(random_number+0.5)
return direction
# 用列表存储轨迹
trajectory_list = []
# 首先让我们用缓慢的方法进行模拟
# 我们需要一个for循环来进行独立的模拟,一个for循环来进行移动
for i in range(number_of_independent_simulations):
#所有粒子开始于0.
#我们把所有粒子的i=0到最大T = number_of_steps_to_take ,即步长的位置先全部初始化为0,后续进行位置调整
position_array = np.zeros(number_of_steps_to_take+1)
for j in range(number_of_steps_to_take):
#使用np.random.random()生成0-1的数
change_in_position = random_to_direction(np.random.random())
position_array[j+1] = position_array[j]+change_in_position
trajectory_list.append(position_array)
#绘制轨迹
plot_trajectories(trajectory_list)
|

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
# 另外一个计算更快的调用numpy的版本
def run_simulations(number_of_independent_simulations,number_of_steps_to_take):
trajectory_list = []
for i in range(number_of_independent_simulations):
#生成具有长度的随机数
random_number_list = np.random.random(size=number_of_steps_to_take)
# 使用前面的函数一次性生成(-1,+1)
directions = random_to_direction(random_number_list)
#np.cumsum(a, axis = 0)用于将数组按行累加
position_array = np.cumsum(directions)
trajectory_list.append(position_array)
return trajectory_list
# 运行
trajectory_list = run_simulations(5,5000)
plot_trajectories(trajectory_list)
|

问题2: 需要多少轨迹才能看到扩散行为?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
number_of_independent_simulations = 200
trajectory_list = run_simulations(number_of_independent_simulations,1000)
# 将其放入数组,并且作为行,然后坐标作为列
trajectory_array = np.array(trajectory_list)
# 查看形状
print("array shape:",trajectory_array.shape)
# 这样我们可以使用numpy相关函数进行一维的计算
# 在固定时间对颗粒的距离进行方差
msd = np.var(trajectory_array,axis=0)
# 0到T的数字列表
times = np.arange(trajectory_array.shape[1])
plt.plot(times,msd,label="mean square distlacement")
plt.plot(times,times,label='linear in time')
plt.ylabel("Var(x)=$\\langle (x(t)-\mu)^2 \\rangle$")
plt.xlabel("$t$")
plt.legend()
|
array shape: (200, 1000)
<matplotlib.legend.Legend at 0x1badb2c8fa0>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# 现在,我们将看到平均最终位置的差异如何取决于独立模拟的数量
# 为了估计差异,我们将在第一组模拟上进行更多的设置
n_repeats_to_get_variance_estimate = 200
# 让我们使用numpy函数来生成2至256的独立样本数量列表
list_of_number_of_independent_samples = np.power(2, np.arange(1,9) )
print(list_of_number_of_independent_samples)
list_of_mean_position_vs_time = []
for i in range(n_repeats_to_get_variance_estimate):
avg_position_list = []
for number_of_independent_simulations in list_of_number_of_independent_samples:
trajectory_list = run_simulations(number_of_independent_simulations,1000)
# 使用numpy的mean在独立的模拟中进行平均求职
avg_positions = np.mean(np.array(trajectory_list), axis = 0 )
avg_position_list.append(avg_positions)
list_of_mean_position_vs_time.append(np.array(avg_position_list))
variance_array = np.var(np.array(list_of_mean_position_vs_time),axis=0)
# 绘图
plt.figure()
for t in [9,29,99,299,999]:
plt.plot(list_of_number_of_independent_samples,variance_array[:,t],label='time=%.f'%(t+1))
plt.legend(loc=0)
plt.xscale('log')
plt.yscale('log')
plt.xlabel("$N_{samples}$")
plt.ylabel("Var($\\mu_{N_samples}$)")
|
[ 2 4 8 16 32 64 128 256]
Text(0, 0.5, 'Var($\\mu_{N_samples}$)')

问题3: 中心极限定理
1
2
3
4
5
6
|
mean_positions_final_time = np.array(list_of_mean_position_vs_time)[:,:,-1]
for i in range(mean_positions_final_time.shape[1]):
num_samples = list_of_number_of_independent_samples[i]
hist,bins = np.histogram(mean_positions_final_time[:,i],bins=10,density=True)
plt.plot((bins[:-1]+bins[1:])/2.0,hist,label='samples=%i'%num_samples)
plt.legend(loc=0)
|
<matplotlib.legend.Legend at 0x1badbaa72b0>
