Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
15 15 # 作业数,机器数
6 94 12 66 4 10 7 53 3 26 2 15 10 65 11 82 8 10 14 27 9 93 13 92 5 96 0 70 1 83 # 奇数是机器索引,偶数是在该机器上的加工时间
4 74 5 31 7 88 14 51 13 57 8 78 11 8 9 7 6 91 10 79 0 18 3 51 12 18 1 99 2 33
1 4 8 82 9 40 12 86 6 50 11 54 13 21 5 6 0 54 2 68 7 82 10 20 4 39 3 35 14 68
5 73 2 23 9 30 6 30 10 53 0 94 13 58 4 93 7 32 14 91 11 30 8 56 12 27 1 92 3 9
7 78 8 23 6 21 10 60 4 36 9 29 2 95 14 99 12 79 5 76 1 93 13 42 11 52 0 42 3 96
5 29 3 61 12 88 13 70 11 16 4 31 14 65 7 83 2 78 1 26 10 50 0 87 9 62 6 14 8 30
12 18 3 75 7 20 8 4 14 91 6 68 1 19 11 54 4 85 5 73 2 43 10 24 0 37 13 87 9 66
11 32 5 52 0 9 7 49 12 61 13 35 14 99 1 62 2 6 8 62 4 7 3 80 9 3 6 57 10 7
10 85 11 30 6 96 14 91 0 13 1 87 2 82 5 83 12 78 4 56 8 85 7 8 9 66 13 88 3 15
6 5 11 59 9 30 2 60 8 41 0 17 13 66 3 89 10 78 7 88 1 69 12 45 14 82 4 6 5 13
4 90 7 27 13 1 0 8 5 91 12 80 6 89 8 49 14 32 10 28 3 90 1 93 11 6 9 35 2 73
2 47 14 43 0 75 12 8 6 51 10 3 7 84 5 34 8 28 9 60 13 69 1 45 3 67 11 58 4 87
5 65 8 62 10 97 2 20 3 31 6 33 9 33 0 77 13 50 4 80 1 48 11 90 12 75 7 96 14 44
8 28 14 21 4 51 13 75 5 17 6 89 9 59 1 56 12 63 7 18 11 17 10 30 3 16 2 7 0 35
10 57 8 16 12 42 6 34 4 37 1 26 13 68 14 73 11 5 0 8 7 12 3 87 2 83 9 20 5 97
import pandas as pd
import numpy as np
from scipy.io import loadmat# AAS011R06.mat
m = loadmat('Data/P300/v2/AA001.mat')
m{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Thu Nov 29 14:36:17 2001',
'__version__': '1.0',
'__globals__': [],
'run': array([[3],
[3],
[3],
...,
[8],
[8],
[8]], dtype=uint8),
'trial': array([[ 0],
[ 0],
[ 0],
...,
[192],
[192],
[192]], dtype=uint8),
'sample': array([[ 0],
[ 1],
[ 2],
...,
[28829],
[28830],
[28831]], dtype=uint16),
'signal': array([[-1136, -416, -592, ..., -816, -496, -624],
[-1456, -912, -752, ..., -48, 336, -48],
[-1888, -912, -480, ..., -240, 0, 64],
...,
[-1952, -2416, -2336, ..., -1376, -2096, -1168],
[-2784, -2912, -2912, ..., 144, -800, -48],
[-1872, -1168, -1264, ..., 1008, 304, 816]], dtype=int16),
'TargetCode': array([[0],
[0],
[0],
...,
[0],
[0],
[0]], dtype=uint8),
'ResultCode': array([[0],
[0],
[0],
...,
[0],
[0],
[0]], dtype=uint8),
'StimulusTime': array([[51992],
[51992],
[51992],
...,
[54165],
[54165],
[54165]], dtype=uint16),
'Feedback': array([[0],
[0],
[0],
...,
[0],
[0],
[0]], dtype=uint8),
'IntertrialInterval': array([[1],
[1],
[1],
...,
[1],
[1],
[1]], dtype=uint8),
'Active': array([[1],
[1],
[1],
...,
[1],
[1],
[1]], dtype=uint8),
'SourceTime': array([[52082],
[52082],
[52082],
...,
[54256],
[54256],
[54256]], dtype=uint16),
'RunActive': array([[1],
[1],
[1],
...,
[1],
[1],
[1]], dtype=uint8),
'Recording': array([[1],
[1],
[1],
...,
[1],
[1],
[1]], dtype=uint8),
'IntCompute': array([[0],
[0],
[0],
...,
[0],
[0],
[0]], dtype=uint8),
'Running': array([[1],
[1],
[1],
...,
[1],
[1],
[1]], dtype=uint8)}for i in m:
try:
print(i,m[i].shape)
except:
continue
# 运行编号(runnr),运行内的强化次数(trinr)和运行内的样品编号(sample)
# 其他部分由于网页乱码看不出来,也可能说明文件中没有展示,需要浏览matlab文件run (172992, 1)
trial (172992, 1)
sample (172992, 1)
signal (172992, 64)
TargetCode (172992, 1)
ResultCode (172992, 1)
StimulusTime (172992, 1)
Feedback (172992, 1)
IntertrialInterval (172992, 1)
Active (172992, 1)
SourceTime (172992, 1)
RunActive (172992, 1)
Recording (172992, 1)
IntCompute (172992, 1)
Running (172992, 1)import torch
import torchvision # 常用数据集包
import torchvision.transforms as transforms # 数据转归一化transform = transforms.Compose( # 自定义一个转换器,先变成张量,再归一化(每个通道的均值序列,标准差序列)
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # (0-0.5)/0.5 = -1
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,
transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True,
num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True,
transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False,
num_workers=2)
# num_workers根据计算机的CPU和内存来设置,充足可以设置多一些
# 设为0表示不用内存
classes = ('plane','car', 'bird','cat','deer', 'dog','frog','horse','ship','truck')Files already downloaded and verified
Files already downloaded and verifiedimport matplotlib.pyplot as plt
import numpy as np
# 展示图片
def imshow(img):
img = img / 2 + 0.5 # 逆归一化
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# 获取一些图片
dataiter = iter(trainloader)
images, labels = dataiter.next()
print(images.size())
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))torch.Size([4, 3, 32, 32]) car truck cat shipimport torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
print(net)Net(
(conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)import torch.optim as optim
criterion = nn.CrossEntropyLoss() # 交叉熵
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 设置了动量的SGDfor epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader, 0): # 从下标0开始迭代
inputs, labels = data # 读取数据
optimizer.zero_grad() # 初始梯度
outputs = net(inputs) # 前向传播
loss = criterion(outputs, labels) # 计算交叉熵
loss.backward() # 反向传播
optimizer.step() # 优化算法,学习参数
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')[1, 2000] loss: 2.193
[1, 4000] loss: 1.864
[1, 6000] loss: 1.660
[1, 8000] loss: 1.559
[1, 10000] loss: 1.514
[1, 12000] loss: 1.468
[2, 2000] loss: 1.384
[2, 4000] loss: 1.356
[2, 6000] loss: 1.325
[2, 8000] loss: 1.317
[2, 10000] loss: 1.295
[2, 12000] loss: 1.271
Finished TrainingPATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH) # 保存字典到指定文件dataiter = iter(testloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4))) cat ship ship planenet = Net()
net.load_state_dict(torch.load(PATH))# 先加载数据,再将数据加载到net里,非必须<All keys matched successfully>outputs = net(images) # 使用训练后的数据,进行预测输出
_, predicted = torch.max(outputs, 1) # torch.max(data, dim)获取dim维向量的最大值,返回value,index
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
for j in range(4)))Predicted: cat car ship planecorrect = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))Accuracy of the network on the 10000 test images: 54 %class_correct = list(0. for i in range(10)) # 10个 0.
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images,labels = data
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted==labels).squeeze() # 将结果压缩一个维度
for i in range(4): # 批量为4
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (
classes[i], 100 * class_correct[i] / class_total[i]))Accuracy of plane : 57 %
Accuracy of car : 52 %
Accuracy of bird : 41 %
Accuracy of cat : 44 %
Accuracy of deer : 27 %
Accuracy of dog : 40 %
Accuracy of frog : 69 %
Accuracy of horse : 56 %
Accuracy of ship : 77 %
Accuracy of truck : 75 %device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)cuda:0net.to(device) # 所有的模块和参数都会递归的变成CUDA张量
inputs, labels = data[0].to(device), data[1].to(device) # 所有数据都需要在GPUimport torchtorch.cuda.is_available()True# 未初始化的矩阵
# 里面的值是不确定的
x = torch.empty(5,3)
print(x)# 构造随机初始化的矩阵
x = torch.rand(5,3)
xtensor([[1.6880e+25, 2.5226e-18, 6.6645e-10],
[4.1575e+21, 1.3294e-08, 2.0773e+20],
[1.6536e-04, 1.0016e-11, 8.3391e-10],
[2.1029e+20, 2.0314e+20, 3.1369e+27],
[7.0800e+31, 3.1095e-18, 1.8590e+34]])tensor([[0.3129, 0.8714, 0.8079],
[0.4722, 0.3522, 0.3068],
[0.9920, 0.0171, 0.6463],
[0.1151, 0.7443, 0.1300],
[0.2816, 0.7904, 0.1833]])# 构造一个填充0,且数据类型dtype为long的矩阵
x = torch.zeros(5, 3, dtype=torch.long)
xtensor([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]])# 将数组转换从tensor张量
x = torch.tensor([5.5, 3])
xtensor([5.5000, 3.0000])# 拷贝一个值
x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes
print(x)tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]], dtype=torch.float64)# 拷贝形状,并随机赋值-1到1之间
x = torch.randn_like(x, dtype=torch.float) # override dtype!
print(x) # result has the same sizetensor([[0.1310, 0.8429, 0.9671],
[0.4961, 0.4118, 0.5708],
[0.9019, 0.1656, 0.9630],
[0.1138, 0.5194, 0.2060],
[0.0544, 0.8853, 0.4521]])# 拷贝形状,并随机赋值0到1之间
x = torch.rand_like(x, dtype=torch.float) # override dtype!
print(x) # result has the same sizetensor([[0.2949, 0.5003, 0.8243],
[0.0197, 0.8175, 0.7986],
[0.2791, 0.1747, 0.9388],
[0.6410, 0.7757, 0.9517],
[0.5885, 0.8757, 0.0301]])# 获取矩阵大小
print(x.size())torch.Size([5, 3])# 加法一,会生成新变量
y = torch.rand(5, 3)
print(x + y)
# 加法二,会生成新变量
print(torch.add(x, y))tensor([[0.8205, 1.1127, 0.9667],
[1.0123, 1.6212, 1.7500],
[0.7720, 0.5131, 1.8308],
[1.1216, 1.0681, 1.8199],
[1.4038, 1.3346, 0.7781]])
tensor([[0.8205, 1.1127, 0.9667],
[1.0123, 1.6212, 1.7500],
[0.7720, 0.5131, 1.8308],
[1.1216, 1.0681, 1.8199],
[1.4038, 1.3346, 0.7781]])# 加法三,设定指定的变量result为输出
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)tensor([[0.8205, 1.1127, 0.9667],
[1.0123, 1.6212, 1.7500],
[0.7720, 0.5131, 1.8308],
[1.1216, 1.0681, 1.8199],
[1.4038, 1.3346, 0.7781]])# 加法四:就地加法,不会生成新变量,但会改变其中一个参数
y.add_(x)
print(y)tensor([[0.8205, 1.1127, 0.9667],
[1.0123, 1.6212, 1.7500],
[0.7720, 0.5131, 1.8308],
[1.1216, 1.0681, 1.8199],
[1.4038, 1.3346, 0.7781]])# 转置矩阵
y.t_()tensor([[0.8205, 1.0123, 0.7720, 1.1216, 1.4038],
[1.1127, 1.6212, 0.5131, 1.0681, 1.3346],
[0.9667, 1.7500, 1.8308, 1.8199, 0.7781]])# 拷贝
x.copy_(y.t_())
# 任何使得张量发生变化的操作都需要添加下划线_tensor([[0.8205, 1.1127, 0.9667],
[1.0123, 1.6212, 1.7500],
[0.7720, 0.5131, 1.8308],
[1.1216, 1.0681, 1.8199],
[1.4038, 1.3346, 0.7781]])# 输出所有列,第1行
print(x[:, 1])tensor([1.1127, 1.6212, 0.5131, 1.0681, 1.3346])# 通过view改变形状
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # -1表示根据其他维度来推断
print(x.size(), y.size(), z.size())torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])# 张量转化为数字
x = torch.randn(1)
print(x)
print(x.item())tensor([1.2919])
1.291871190071106# torch转NumPy
# 两个变量共享内存
a = torch.ones(5)
print(a)
b = a.numpy()
print(b)
a.add_(1)
print(a)
print(b)tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]# NumPy转torch
# 自动转化
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)
# CharTensor不支持转化为NumPy[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)# 以下代码只有在PyTorch GPU版本上才会执行
if torch.cuda.is_available():
device = torch.device("cuda") # GPU
y = torch.ones_like(x, device=device) # 直接创建一个在GPU上的Tensor
x = x.to(device) # 改变环境,等价于 .to("cuda")
z = x + y
print(z)
print(z.to("cpu", torch.double)) # to()还可以同时更改数据类型tensor([1.1416, 2.4353], device='cuda:0')
tensor([1.1416, 2.4353], dtype=torch.float64)print(y)tensor([1., 1.], device='cuda:0')torch.empty(5, 3)
torch.rand(5, 3)
torch.zeros(5, 3, dtype=torch.long)
torch.tensor([5.5, 3]) # 根据数组创建
# 获取形状
print(x.size())
print(x.shape)x + y
torch.add(x, y)
y.add_(x) # 就地加法,类似x.copy_(y), x.t_()# 不同的size,但是是共享data
y = x.view(15)
z = x.view(-1, 5) # -1所指的维度可以根据其他维度的值推出来
print(x.size(), y.size(), z.size())
# torch.Size([5, 3]) torch.Size([15]) torch.Size([3, 5])
# 不同size,不同data的新副本
x_cp = x.clone().view(15) # clone会被记录在计算图中,即梯度传播影响源数据
# reshape() 不保证返回的一定是拷贝后的数据x = torch.randn(1)
print(x)
print(x.item())
# tensor([2.3466])
# 2.3466382026672363y[:] = y + x # 就地
y = y + x # 非就地
# view只是共享了tensor的数据,二者id(内存地址)并不一致。
# 因为tensor里面还有除了data的数据# tensor to numpy
a = torch.ones(5)
b = a.numpy()# numpy to tensor
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
# torch.tensor()将NumPy数组转换成Tensor
# 该方法总是会进行数据拷贝
c = torch.tensor(a)import torch
# 神经网络的包,仅支持小批量样本的训练,不支持单个样本(可以input.unsqueeze(0)来模拟批量[1])
import torch.nn as nn
import torch.nn.functional as F # 一些常用的函数
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 3) # 卷积核
self.conv2 = nn.Conv2d(6, 16, 3) # 卷积核
# Linear是简单的映射函数,第一个参数w的维度,第二个参数b的维度
self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6*6是图片的长宽
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) # 最后输出10个分类
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # 最大二维池化层:2*2格子内取最大
x = F.max_pool2d(F.relu(self.conv2(x)), 2) # 第一个参数的结果是笔直的向量
x = x.view(-1, self.num_flat_features(x)) # -1表示自动计算,view是改变形状
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x): # 每个输入变平后的数量大小
size = x.size()[1:] # 获得除了批量大小的所有维度
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
print(net) # 获取当前模型的数据Net(
(conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
(fc1): Linear(in_features=576, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)print(params[1]) # conv1的偏移params = list(net.parameters())
print(len(params)) # 自动学习参数的数量
print(params[0].size()) # 6*1*3*3 ,conv1的权重10
torch.Size([6, 1, 3, 3])Parameter containing:
tensor([ 0.0176, 0.2514, 0.1497, 0.1647, -0.2829, 0.0409],
requires_grad=True)input = torch.randn(1,1,32,32)
out = net(input)
print(out) # 输出,数值最大的是正确分类,但一开始没有训练是随机的tensor([[ 6.9398e-03, -6.6113e-05, -6.9214e-02, -7.6395e-02, 3.9166e-02,
-7.0978e-02, 1.2993e-02, -1.7690e-02, -1.4292e-02, -4.7204e-02]],
grad_fn=<AddmmBackward>)net.zero_grad() # 设置所有梯度为0
out.backward(torch.randn(1,10)) # 设置随机数为梯度output = net(input)
target = torch.randn(10) # 假设一个假的结果
target = target.view(1, -1) # 变为[1,10]形状
criterion = nn.MSELoss() # 均方误差
loss = criterion(output, target)
print(loss)tensor(0.7828, grad_fn=<MseLossBackward>)"""
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
-> view -> linear -> relu -> linear -> relu(3) -> linear(2)
-> MSELoss(1)
-> loss
"""
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU<MseLossBackward object at 0x7fefaac1c8d0>
<AddmmBackward object at 0x7fefaac1ca90>
<AccumulateGrad object at 0x7fefaac1c8d0>net.zero_grad()
print('conv1.bias.grad before backward') # 反向传播之前
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward') # 反向传播后
print(net.conv1.bias.grad)conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0158, -0.0015, -0.0017, 0.0073, 0.0074, -0.0123])# 更新权重
learning_rate = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate) # sub_是减等于import torch.optim as optim # 优化算法包
optimizer = optim.SGD(net.parameters(), lr=0.01) # 创建优化算法器
optimizer.zero_grad() # 初始化梯度为0
output = net(input) # 前向传播
loss = criterion(output, target) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新权重import torch# 设置跟踪向量
x = torch.ones(2, 2, requires_grad=True)
print(x)tensor([[1., 1.],
[1., 1.]], requires_grad=True)y = x + 2
ytensor([[3., 3.],
[3., 3.]], grad_fn=<AddBackward0>)help(y.grad_fn)Help on AddBackward0 object:
class AddBackward0(object)
| Methods defined here:
|
| __call__(self, /, *args, **kwargs)
| Call self as a function.
|
| name(...)
|
| register_hook(...)
|
| ----------------------------------------------------------------------
| Data descriptors defined here:
|
| metadata
|
| next_functions
|
| requires_gradz = y * y *3 # z = (x+2)^2 *3
out = z.mean() # out = (x+2)^2 *3/4
z, out(tensor([[27., 27.],
[27., 27.]], grad_fn=<MulBackward0>),
tensor(27., grad_fn=<MeanBackward0>))a = torch.randn(2, 2)
a = ((a*3)/ (a-1))
# 默认为Flase
print(a.requires_grad)
# 设置梯度为True
a.requires_grad_(True)
print(a.requires_grad)
b = (a*a).sum()
print(b.grad_fn)False
True
<SumBackward0 object at 0x7fd230a89d90># 求出out对x在x=1的偏导数
out.backward()
x.gradtensor([[4.5000, 4.5000],
[4.5000, 4.5000]])x = torch.randn(3 , requires_grad=True)
y = x *2
# norm是求L2范数,平方之和而后开方
while y.data.norm() < 1000:
y = y *2
print(y)tensor([ 277.2757, 3.3198, -986.0908], grad_fn=<MulBackward0>)v = torch.tensor([0.1, 1.0, 1.0], dtype=torch.float)
# 将向量v传递给backward
y.backward(v)
# 计算向量-雅可比积,也就是在x=v的时候的梯度
x.grad
# 下面表示2^11次方tensor([ 204.8000, 2048.0000, 2048.0000])print(x.requires_grad)
print((x ** 2).requires_grad)
# 取消求梯度
with torch.no_grad():
print((x ** 2).requires_grad)True
True
Falseprint(x.requires_grad)
# 取消求梯度
y = x.detach()
print(y.requires_grad)
print(x.eq(y))True
False
tensor([True, True, True])
from mxnet import autograd, nd
x = nd.arange(4).reshape((4, 1))
# 申请梯度的内存
x.attach_grad()
# 在运算的过程中记录梯度,此时从预测模式转为训练模式
with autograd.record():
y = 2 * nd.dot(x.T, x)
# 求梯度,y如果不是标量会求和
y.backward()gluon.loss和init模块里提供了哪些损失函数和初始化方法。————————————————————两层有丢弃法—————————————————————————
epoch 1, loss 1.1832, train acc 0.534, test acc 0.773
epoch 2, loss 0.6075, train acc 0.772, test acc 0.826
epoch 3, loss 0.5131, train acc 0.809, test acc 0.844
epoch 4, loss 0.4745, train acc 0.826, test acc 0.856
epoch 5, loss 0.4468, train acc 0.836, test acc 0.855
————————————————————两层无丢弃法—————————————————————————
epoch 1, loss 1.0908, train acc 0.573, test acc 0.786
epoch 2, loss 0.5375, train acc 0.797, test acc 0.838
epoch 3, loss 0.4577, train acc 0.831, test acc 0.851
epoch 4, loss 0.4101, train acc 0.846, test acc 0.861
epoch 5, loss 0.3852, train acc 0.855, test acc 0.862————————————————————四层有丢弃法—————————————————————————
epoch 1, loss 2.3031, train acc 0.098, test acc 0.100
epoch 2, loss 2.3014, train acc 0.106, test acc 0.200
epoch 3, loss 1.7850, train acc 0.259, test acc 0.384
epoch 4, loss 1.2700, train acc 0.476, test acc 0.663
epoch 5, loss 0.8095, train acc 0.673, test acc 0.744
————————————————————四层无丢弃法—————————————————————————
epoch 1, loss 2.3029, train acc 0.102, test acc 0.100
epoch 2, loss 2.2843, train acc 0.114, test acc 0.202
epoch 3, loss 1.5485, train acc 0.348, test acc 0.496
epoch 4, loss 0.9888, train acc 0.583, test acc 0.707
epoch 5, loss 0.6727, train acc 0.737, test acc 0.813%matplotlib inline # 将matplotlib包设置为内联,可以直接调用其内函数
# 生成数组服从均值0,标准差1的正态分布,形状(num_examples, num_inputs)
nd.random.normal(scale=1, shape=(num_examples, num_inputs))
# 相当于函数中的return,但是会记住当前状态,next时候继续,是生成器
yield features.take(j), labels.take(j)
# 将列表随机排列
random.shuffle(list)
# 第一个x轴,第二个参数y轴,前两个为向量且长度相等,第三个为散点的大小
plt.scatter(features[:, 1].asnumpy(), labels.asnumpy(), 1);
# 计算算术平均值
train_l.mean()0.05时候的损失函数值下降为
epoch 1, loss 0.000437
epoch 2, loss 0.000053
epoch 3, loss 0.000053
0.03时候的损失函数值下降为
epoch 1, loss 0.042622
epoch 2, loss 0.000177
epoch 3, loss 0.000050
0.01时候的损失函数值下降为
epoch 1, loss 2.305500
epoch 2, loss 0.332681
epoch 3, loss 0.048155from mxnet.gluon import data as gdata
batch_size = 10
# 1.组合训练数据的特征和标签
dataset = gdata.ArrayDataset(features, labels)
# 2.随机读取一批数据
data_iter = gdata.DataLoader(dataset, batch_size, shuffle = True)
from mxnet.gluon import nn
# 3.定义模型。Sequential实例可以看作是一个串联各个层的容器。
net = nn.Sequential()
# 4.定义输出层。全连接层是一个Dense实例,定义该层输出个数为1。
net.add(nn.Dense(1))
from mxnet import init
# 5.初始化参数。权重参数每个元素初始化为均值为0、标准差为0.01的正态分布。
# 偏差参数b默认会初始化为零。
net.initialize(init.Normal(sigma=0.001))
from mxnet.gluon import loss as gloss
# 6.定义损失函数
loss = gloss.L2Loss() # 平方损失又称L2范数损失
from mxnet import gluon
# 7.定义优化函数。自动收集参数,sgd梯度下降优化算法,超参数学习率
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.03})
# 8.开始训练
num_epochs = 3
for epoch in range(1, num_epochs +1 ):
for X , y in data_iter:
with autograd.record():
l = loss(net(X), y) # 计算每一批的损失
l.backward() # 反向求梯度
trainer.step(batch_size) # 告知每一步的批量
l = loss(net(features),labels)# 输入全部来获得损失,w,b已更新
print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))
dense = net[0] # 从net获得输入层
true_w, dense.weight.data() # 对比真实权重和预估权重
true_b, dense.bias.data() # 对比真实偏差和预估偏差# softmax函数
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(axis=1, keepdims=True)
return X_exp / partition # 这里应用了广播机制
# 评估模型net在数据集data_iter上的准确率
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter: # X为featureS,y为labelS
y = y.astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar()
n += y.size
return acc_sum / n
# 交叉熵
def cross_entropy(y_hat, y):
return -nd.pick(y_hat, y).log()epoch 1, loss 0.7705, train acc 0.714, test acc 0.834
epoch 2, loss 0.4853, train acc 0.820, test acc 0.857
epoch 3, loss 0.4198, train acc 0.845, test acc 0.851
epoch 4, loss 0.3873, train acc 0.857, test acc 0.871
epoch 5, loss 0.3662, train acc 0.863, test acc 0.873epoch 1, loss 0.8059, train acc 0.700, test acc 0.802
epoch 2, loss 0.4902, train acc 0.820, test acc 0.854
epoch 3, loss 0.4278, train acc 0.842, test acc 0.862
epoch 4, loss 0.3975, train acc 0.853, test acc 0.865
epoch 5, loss 0.3822, train acc 0.860, test acc 0.865epoch 1, loss 0.8118, train acc 0.699, test acc 0.827
epoch 2, loss 0.4928, train acc 0.817, test acc 0.849
epoch 3, loss 0.4344, train acc 0.840, test acc 0.847
epoch 4, loss 0.4024, train acc 0.852, test acc 0.862
epoch 5, loss 0.3769, train acc 0.860, test acc 0.868epoch 1, loss 1.2498, train acc 0.519, test acc 0.732
epoch 2, loss 0.5695, train acc 0.786, test acc 0.804
epoch 3, loss 0.4659, train acc 0.827, test acc 0.854
epoch 4, loss 0.4311, train acc 0.841, test acc 0.858
epoch 5, loss 0.3923, train acc 0.855, test acc 0.857# 为了增加隐藏层,所修改的代码
num_inputs, num_outputs, num_hiddens,num_hiddens2 = 784, 10, 256,256
W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))
b1 = nd.zeros(num_hiddens)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)
params = [W1, b1, W2, b2, W3, b3]
for param in params:
param.attach_grad()
def net(X):
X = X.reshape((-1, num_inputs)) # -1参数说明根据另外的参数来计算该参数的实际值
Y = relu(nd.dot(X, W1) + b1)
Y = Y.reshape((-1, num_hiddens))
H = relu(nd.dot(Y, W2) + b2)
return nd.dot(H, W3) + b3net.add(nn.Dense(256, activation='relu'),nn.Dense(256, activation='relu'),nn.Dense(256, activation='relu'),
nn.Dense(10))epoch 1, loss 1.9463, train acc 0.221, test acc 0.497
epoch 2, loss 0.9294, train acc 0.633, test acc 0.715
epoch 3, loss 0.6073, train acc 0.769, test acc 0.781
epoch 4, loss 0.5036, train acc 0.809, test acc 0.840
epoch 5, loss 0.4902, train acc 0.820, test acc 0.834lr, num_epochs = 0.95, 7
——————————————————————————结果————————————————————————————————————
training on gpu(0)
epoch 1, loss 1.0864, train acc 0.577, test acc 0.778, time 4.3 sec
epoch 2, loss 0.5540, train acc 0.782, test acc 0.826, time 4.3 sec
epoch 3, loss 0.4597, train acc 0.828, test acc 0.850, time 4.3 sec
epoch 4, loss 0.4063, train acc 0.849, test acc 0.862, time 4.3 sec
epoch 5, loss 0.3754, train acc 0.860, test acc 0.872, time 4.3 sec
epoch 6, loss 0.3539, train acc 0.868, test acc 0.877, time 4.3 sec
epoch 7, loss 0.3320, train acc 0.876, test acc 0.881, time 4.3 se————————————————AlexNet的每一层输出形状————————————————
conv0 output shape: (1, 96, 54, 54)
pool0 output shape: (1, 96, 26, 26)
conv1 output shape: (1, 256, 26, 26)
pool1 output shape: (1, 256, 12, 12)
conv2 output shape: (1, 384, 12, 12)
conv3 output shape: (1, 384, 12, 12)
conv4 output shape: (1, 256, 12, 12)
pool2 output shape: (1, 256, 5, 5)
dense0 output shape: (1, 4096)
dropout0 output shape: (1, 4096)
dense1 output shape: (1, 4096)
dropout1 output shape: (1, 4096)
dense2 output shape: (1, 10)
————————————————VGG的每一层输出形状————————————————
sequential1 output shape: (1, 64, 112, 112)
sequential2 output shape: (1, 128, 56, 56)
sequential3 output shape: (1, 256, 28, 28)
sequential4 output shape: (1, 512, 14, 14)
sequential5 output shape: (1, 512, 7, 7)
dense0 output shape: (1, 4096)
dropout0 output shape: (1, 4096)
dense1 output shape: (1, 4096)
dropout1 output shape: (1, 4096)
dense2 output shape: (1, 10)training on gpu(0)
epoch 1, loss 1.9254, train acc 0.283, test acc 0.498, time 121.0 sec
epoch 2, loss 0.9495, train acc 0.651, test acc 0.747, time 118.4 sec
epoch 3, loss 0.6075, train acc 0.774, test acc 0.812, time 120.9 sec
epoch 4, loss 0.4930, train acc 0.818, test acc 0.856, time 120.1 sec
epoch 5, loss 0.4341, train acc 0.840, test acc 0.853, time 119.4 sec
epoch 6, loss 0.3901, train acc 0.857, test acc 0.870, time 120.3 sec
epoch 7, loss 0.3619, train acc 0.868, test acc 0.884, time 119.5 sec
epoch 8, loss 0.3398, train acc 0.876, test acc 0.892, time 121.4 secpool2d = nn.MaxPool2D((2, 3), padding=(1, 2), strides=(2, 3)) # 二维最大池化层函数
pool2d(X) # 调用net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
# 池化窗口与步幅形状相同,池化窗口在输入上每次滑动所覆盖的区域互不重叠。
nn.MaxPool2D(pool_size = 2, strides = 2),
# 增加输出通道使两个卷积层的参数尺寸类似。
nn.Conv2D(channels=10, kernel_size=5, activation='relu'),
nn.MaxPool2D(pool_size = 2, strides = 2),
# Dense会默认将(批量,通道,宽,高)的输入转换成(批量,通道*宽*高)
# 全连接层块会将小批量中每个样本变平(flatten)。
nn.Dense(120, activation='sigmoid'),
nn.Dense(84, activation='sigmoid'),
nn.Dense(10))# 使用较大的11 x 11窗口来捕获物体。同时使用步幅4来较大幅度减小输出高和宽。这里使用的输出通
# 道数比LeNet中的也要大很多
net.add(nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),
nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),
nn.MaxPool2D(pool_size=3, strides=2),
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
nn.Dense(4096, activation="relu"), nn.Dropout(0.5),
# 输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
nn.Dense(10))import d2lzh as d2l
from mxnet import gluon, init, nd
from mxnet.gluon import nn
# 它可以指定卷积层的数量num_convs和输出通道数num_channels
def vgg_block(num_convs, num_channels):
blk = nn.Sequential()
for _ in range(num_convs):
blk.add(nn.Conv2D(num_channels, kernel_size=3,
padding=1, activation='relu'))
blk.add(nn.MaxPool2D(pool_size=2, strides=2))
return blk# NiN块,三个卷积层,第一层可以指定
def nin_block(num_channels, kernel_size, strides, padding):
blk = nn.Sequential()
blk.add(nn.Conv2D(num_channels, kernel_size,
strides, padding, activation='relu'),
nn.Conv2D(num_channels, kernel_size=1, activation='relu'),
nn.Conv2D(num_channels, kernel_size=1, activation='relu'))
return blk# 批量归一化的LeNet,简洁实现版。
net = nn.Sequential()
net.add(nn.Conv2D(6, kernel_size=5),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(16, kernel_size=5),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Dense(120),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.Dense(84),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.Dense(10))# 残差块:首先有2个有相同输出通道数的 3×3 卷积层。每个卷积层后接一个批量归一化层和ReLU激活函数。然后我们将输入跳过这2个卷积运算后直接加在最后的ReLU激活函数前。
class Residual(nn.Block):
def __init__(self, num_channels, use_1x1conv=False, strides=1 , **kwargs):
super(Residual, self).__init__(**kwargs)
self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides)
self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm()
self.bn2 = nn.BatchNorm()
def forward(self, X):
Y = nd.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return nd.relu(Y + X)import gc
gc.collect() # 清理内存# 坐标向量
a = np.array([1,2,3])
# 坐标向量
b = np.array([7,8])
# 从坐标向量中返回坐标矩阵
# 返回list,有两个元素,第一个元素是X轴的取值,第二个元素是Y轴的取值
res = np.meshgrid(a,b)
#返回结果: [array([ [1,2,3] [1,2,3] ]), array([ [7,7,7] [8,8,8] ])]
# (1,7)(2,7)(3,7)(1,8)(2,8)(3,8)trainer_hyperparams['learning_rate'] /= 10
trainer.set_learning_rate(trainer_hyperparams['learning_rate'])
print(trainer_hyperparams['learning_rate'])pred_period变量设为1,观察未充分训练的模型(困惑度高)是如何创作歌词的。你获得了什么启发?batchify函数指定DataLoader实例中小批量的读取方式,并打印了读取的第一个批量中各个变量的形状。这些形状该如何计算得到?scratch_netnum_epochs为10,后面周期的准确率增加还是没有赶上finetune_net。说明两者还是有很多不同的,不仅仅在迭代周期上。题目中的精度不是很明白在问什么。backward函数。——————————————————————————————随机取样——————————————————————————
epoch 50, perplexity 72.627812, time 0.31 sec
- 分开 我不要再想你 哼哼哈觉截棍 哼哼哈兮截棍 哼哼哈兮截棍 哼哼哈兮截棍 哼哼哈兮截棍 哼哼哈兮截棍
- 不分开 我想想你的溪边河知都的 我想要再想你 哼哼哈觉截棍 哼哼哈兮截棍 哼哼哈兮截棍 哼哼哈兮截棍 哼哼
epoch 100, perplexity 12.504797, time 0.31 sec
- 分开 一直在双截棍 哼哼哈兮 快使用双截棍 哼者哈兮 快使用双截棍 哼哼哈兮 快使用双截棍 哼哼哈兮 快
- 不分开不 我不能让呵牵 如果我遇见你是一场悲剧 我想以让生小就定一个人 干什么让我习糗处可躲就耳我想要你说
epoch 150, perplexity 3.392714, time 0.31 sec
- 分开 有杰伦 一步两步三步四步望著天 看星星 一颗两颗三颗四步望著天 看星星 一颗两颗三颗四颗望著天 看
- 不分开吗 我后你很 在颗心悬在半动 我默悔够远照看 就像是童话故事 就么忙跟武当山 你说林苦武一堡 你说在
epoch 200, perplexity 1.976689, time 0.31 sec
- 分开 一直令它心仪的母斑鸠 牛仔红蕃 在小镇 背在背决斗 一只灰狼 问候村日 一场日痛 你的完空 在小
- 不分开吗 我叫你爸 你打我妈 这样对吗干嘛这样 何必让酒牵鼻子走 瞎 说么一口 你爱完我 说你说 干数怎么
epoch 250, perplexity 1.595479, time 0.31 sec
- 分开 有杰伦经三 谁慢苦习 让我爱上你 那场悲剧 是你完美演出的一场戏 宁愿心碎哭泣 再狠狠忘记 你爱过
- 不分开扫把的胖女巫 用拉丁文念咒语啦啦呜 她养的黑猫笑起来像哭 啦啦啦呜 刻在心动 染底夜空 过去种种 象my_seq = list(range(30))
for X, Y in data_iter_random(my_seq, batch_size=2, num_steps=6):
print('X: ', X, '\nY:', Y, '\n')
——————————————————————输出————————————————————————
X:
[[ 0. 1. 2. 3. 4. 5.]
[18. 19. 20. 21. 22. 23.]]
<NDArray 2x6 @cpu(0)>
Y:
[[ 1. 2. 3. 4. 5. 6.]
[19. 20. 21. 22. 23. 24.]]
<NDArray 2x6 @cpu(0)>
X:
[[ 6. 7. 8. 9. 10. 11.]
[12. 13. 14. 15. 16. 17.]]
<NDArray 2x6 @cpu(0)>
Y:
[[ 7. 8. 9. 10. 11. 12.]
[13. 14. 15. 16. 17. 18.]]
<NDArray 2x6 @cpu(0)>for X, Y in data_iter_consecutive(my_seq, batch_size=2, num_steps=6):
print('X: ', X, '\nY:', Y, '\n')
——————————————————————输出————————————————————————
X:
[[ 0. 1. 2. 3. 4. 5.]
[15. 16. 17. 18. 19. 20.]]
<NDArray 2x6 @cpu(0)>
Y:
[[ 1. 2. 3. 4. 5. 6.]
[16. 17. 18. 19. 20. 21.]]
<NDArray 2x6 @cpu(0)>
X:
[[ 6. 7. 8. 9. 10. 11.]
[21. 22. 23. 24. 25. 26.]]
<NDArray 2x6 @cpu(0)>
Y:
[[ 7. 8. 9. 10. 11. 12.]
[22. 23. 24. 25. 26. 27.]]
<NDArray 2x6 @cpu(0)># 定义参数形状
def get_params():
def _one(shape):
return nd.random.normal(scale=0.01, shape=shape, ctx=ctx)
# 隐藏层参数
W_xh_1 = _one((num_inputs, num_hiddens))
W_hh_1 = _one((num_hiddens, num_hiddens))
b_h_1 = nd.zeros(num_hiddens, ctx=ctx)
# 注意这里的shape和前面不同
W_xh_2 = _one((num_hiddens, num_hiddens))
W_hh_2 = _one((num_hiddens, num_hiddens))
b_h_2 = nd.zeros(num_hiddens, ctx=ctx)
# 输出层参数
W_hq = _one((num_hiddens, num_outputs))
b_q = nd.zeros(num_outputs, ctx=ctx)
# 附上梯度
params = [W_xh_1, W_hh_1, b_h_1, W_xh_2, W_hh_2, b_h_2, W_hq, b_q]
for param in params:
param.attach_grad()
return params
# 返回初始化的隐藏状态
def init_rnn_state(batch_size, num_hiddens, ctx):
return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx),
nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx))
# 循环神经网络模型,tanh激活函数
def rnn(inputs, state, params):
# inputs和outputs皆为num_steps个形状为(batch_size, vocab_size)的矩阵
W_xh_1, W_hh_1, b_h_1, W_xh_2, W_hh_2, b_h_2, W_hq, b_q = params
H_1,H_2 = state
outputs = []
for X in inputs:
H_1 = nd.relu(nd.dot(X, W_xh_1) + nd.dot(H_1, W_hh_1) + b_h_1)
H_2 = nd.relu(nd.dot(H_1, W_xh_2) + nd.dot(H_2, W_hh_2) + b_h_2)
Y = nd.dot(H_2, W_hq) + b_q
outputs.append(Y)
return outputs, (H_1,H_2)get_similar_tokens('love', 3, net[0])
————————————————————————————————输出————————————————————————
cosine sim=0.667: thieves
cosine sim=0.657: gut
cosine sim=0.654: catching
———————————————————————————————————————————————————————————
get_similar_tokens('the', 3, net[0])
————————————————————————————————输出————————————————————————
cosine sim=0.495: pravda
cosine sim=0.489: avoided
cosine sim=0.458: grossly# 当大于某个随机数的时候丢弃
def discard(idx):
return random.uniform(0, 1) < 1 - math.sqrt(
1e-4 / counter[idx_to_token[idx]] * num_tokens)# 每次在整数1和max_window_size之间随机均匀采样一个整数作为背景窗口大小。
def get_centers_and_contexts(dataset, max_window_size):
centers, contexts = [], []
for st in dataset:
if len(st) < 2: # 每个句子至少要有2个词才可能组成一对“中心词-背景词”
continue
centers += st
for center_i in range(len(st)):
window_size = random.randint(1, max_window_size)
indices = list(range(max(0, center_i - window_size),
min(len(st), center_i + 1 + window_size)))
indices.remove(center_i) # 将中心词排除在背景词之外
contexts.append([st[idx] for idx in indices])
return centers, contexts# 从背景词中根据权重随机制造噪音,生成负采样
def get_negatives(all_contexts, sampling_weights, K):
all_negatives, neg_candidates, i = [], [], 0
pupulation = list(range(len(sampling_weights)))
for contexts in all_contexts:
negatives = []
while len(negatives) < len(contexts) * K:
# 根据每个词的权重随机生成K个词的索引作为噪声词,每次生成1e5,不够继续生成
if i == len(neg_candidates):
i, neg_candidates = 0, random.choices(pupulation, sampling_weights, k = int(1e5))
neg, i = neg_candidates[i], i+1
# 当噪声词不为背景词时才可以使用
if neg not in set(contexts):
negatives.append(neg)
all_negatives.append(negatives)
return all_negativesnet.add(nn.Embedding(input_dim=len(idx_to_token), output_dim=embed_size, sparse_grad = True),nn.Embedding(input_dim=len(idx_to_token), output_dim=embed_size, sparse_grad = True))get_analogy('父亲', '母亲', '爷爷', zh)
——————————————————————输出——————————————————
'爷爷'import numpy as np
def softmax(X): #softmax函数
return np.exp(X) / np.sum(np.exp(X))
test = [[1,2,3],[2,4,6]]
softmax(test)
——————————————————输出————————————————————
array([[0.00548473, 0.01490905, 0.04052699],
[0.01490905, 0.11016379, 0.8140064 ]])def gru(inputs, state, params):
W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hq, b_q = params
H, = state
outputs = []
for X in inputs:
Z = nd.sigmoid(nd.dot(X, W_xz) + nd.dot(H, W_hz) + b_z)
R = nd.sigmoid(nd.dot(X, W_xr) + nd.dot(H, W_hr) + b_r)
H_tilda = nd.tanh(nd.dot(X, W_xh) + nd.dot(R * H, W_hh) + b_h)
H = Z * H + (1 - Z) * H_tilda
Y = nd.dot(H, W_hq) + b_q
outputs.append(Y)
return outputs, (H,)def begin_state(self, enc_state):
# 直接将编码器最终时间步的隐藏状态作为解码器的初始隐藏状态
return enc_statedec_input = dec_outputcenters shape: (512, 1)
contexts_negatives shape: (512, 60)
masks shape: (512, 60)
labels shape: (512, 60)
最大长度的计算:max_len = max(len(c) + len(n) for _,c,n in data)
c是指这个批量中的背景词,n是噪声词,两个相加起来,取最大的。# 预训练词向量的维度需要与创建的模型中的嵌入层输出大小embed_size一致
embed_size, num_hiddens, num_layers, ctx = 300, 100, 2, d2l.try_all_gpus()
# 加载词向量
glove_embedding = text.embedding.create(
'glove', pretrained_file_name='glove.6B.300d.txt', vocabulary=vocab)finetune_net.features.collect_params().setattr('grad_req', 'null')
————————————————输出——————————————————
training on [gpu(0)]
epoch 1, loss 0.4164, train acc 0.824, test acc 0.849, time 13.2 sec
epoch 2, loss 0.4104, train acc 0.820, test acc 0.848, time 13.3 sec
epoch 3, loss 0.4065, train acc 0.812, test acc 0.849, time 13.1 sec
epoch 4, loss 0.3911, train acc 0.820, test acc 0.850, time 13.1 sec
epoch 5, loss 0.3945, train acc 0.822, test acc 0.846, time 13.0 sectrain_with_data_aug(no_aug, no_aug)
——————————————————————————————结果——————————————————————
training on [gpu(0)]
epoch 1, loss 1.3485, train acc 0.522, test acc 0.556, time 62.9 sec
epoch 2, loss 0.7872, train acc 0.722, test acc 0.705, time 65.0 sec
epoch 3, loss 0.5654, train acc 0.802, test acc 0.738, time 67.0 sec
epoch 4, loss 0.4175, train acc 0.853, test acc 0.777, time 67.9 sec
epoch 5, loss 0.3043, train acc 0.895, test acc 0.789, time 67.8 sec
epoch 6, loss 0.2183, train acc 0.923, test acc 0.799, time 68.2 sec
epoch 7, loss 0.1547, train acc 0.946, test acc 0.810, time 68.5 sec
epoch 8, loss 0.1150, train acc 0.960, test acc 0.799, time 68.9 sec
epoch 9, loss 0.0814, train acc 0.972, test acc 0.809, time 69.1 sec
epoch 10, loss 0.0725, train acc 0.974, test acc 0.806, time 70.2 seccomplex_aug = gdata.vision.transforms.Compose([
gdata.vision.transforms.RandomFlipLeftRight(),
gdata.vision.transforms.RandomHue(0.5),
gdata.vision.transforms.ToTensor()])
train_with_data_aug(complex_aug, no_aug)
————————————————————————————————结果————————————————————————
training on [gpu(0)]
epoch 1, loss 1.5822, train acc 0.446, test acc 0.496, time 69.3 sec
epoch 2, loss 0.9240, train acc 0.673, test acc 0.676, time 68.4 sec
epoch 3, loss 0.6791, train acc 0.764, test acc 0.739, time 68.6 sec
epoch 4, loss 0.5490, train acc 0.810, test acc 0.728, time 69.8 sec
epoch 5, loss 0.4555, train acc 0.842, test acc 0.777, time 70.5 sec
epoch 6, loss 0.3836, train acc 0.868, test acc 0.762, time 70.2 sec
epoch 7, loss 0.3227, train acc 0.889, test acc 0.795, time 69.8 sec
epoch 8, loss 0.2728, train acc 0.906, test acc 0.807, time 70.0 sec
epoch 9, loss 0.2392, train acc 0.918, test acc 0.823, time 70.8 sec
epoch 10, loss 0.1931, train acc 0.934, test acc 0.820, time 70.1 secpretrained_net = model_zoo.vision.resnet18_v2(pretrained=True)
finetune_net = model_zoo.vision.resnet18_v2(classes=2)
finetune_net.features = pretrained_net.features
finetune_net.output.initialize(init.Xavier())
# output中的模型参数将在迭代中使用10倍大的学习率
finetune_net.output.collect_params().setattr('lr_mult', 10)
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5):
train_iter = gdata.DataLoader(
train_imgs.transform_first(train_augs), batch_size, shuffle=True)
test_iter = gdata.DataLoader(
test_imgs.transform_first(test_augs), batch_size)
ctx = d2l.try_all_gpus()
net.collect_params().reset_ctx(ctx)
net.hybridize()
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
'learning_rate': learning_rate, 'wd': 0.001})
d2l.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs)# bbox是bounding box的缩写
# 左上角的(x,y),右下角的(x,y)
dog_bbox, cat_bbox = [60, 45, 378, 516], [400, 112, 655, 493]
def bbox_to_rect(bbox, color): # 本函数已保存在d2lzh包中方便以后使用
# 将边界框(左上x, 左上y, 右下x, 右下y)格式转换成matplotlib格式:
# ((左上x, 左上y), 宽, 高)
return d2l.plt.Rectangle(
xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],
fill=False, edgecolor=color, linewidth=2)
fig = d2l.plt.imshow(img)
# axes是坐标轴
fig.axes.add_patch(bbox_to_rect(dog_bbox, 'blue'))
fig.axes.add_patch(bbox_to_rect(cat_bbox, 'red'));img = image.imread('../img/catdog.jpg').asnumpy()
h, w = img.shape[0:2] # 高,宽
X = nd.random.uniform(shape=(1, 3, h, w)) # 构造输入数据
# 生成锚框y的形状为(批量大小,锚框个数,4)
# 锚框个数=wh(n+m-1),4为左上右下坐标
# n为sizes个数,m为ratios个数
Y = contrib.nd.MultiBoxPrior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5])
# 变为(图像高,图像宽,以相同像素为中心的锚框个数,4)
# 可以通过指定像素位置来获取所有以该像素为中心的锚框
boxes = Y.reshape((h, w, 5, 4))
boxes[250, 250, 0, :]ground_truth = nd.array([[0, 0.1, 0.08, 0.52, 0.92],
[1, 0.55, 0.2, 0.9, 0.88]])
anchors = nd.array([[0, 0.1, 0.2, 0.3], [0.15, 0.2, 0.4, 0.4],
[0.63, 0.05, 0.88, 0.98], [0.66, 0.45, 0.8, 0.8],
[0.57, 0.3, 0.92, 0.9]])
# 为锚框标注类别和偏移量,交并比小于阈值(默认为0.5)
labels = contrib.nd.MultiBoxTarget(anchors.expand_dims(axis=0),# (1,2,5)
ground_truth.expand_dims(axis=0), # (1,5,4)
nd.zeros((1, 3, 5)))# 结果为(批量,类别,锚框)
labels[2] # 返回结果第三项为类别
# 第二项为掩码(mask)变量,形状为(批量大小, 锚框个数的四倍)。
labels[1] # 0可以在计算目标函数之前过滤掉负类的偏移量。
# 第一项是为每个锚框标注的四个偏移量,其中负类锚框的偏移量标注为0。
labels[0]# 可以移除相似的预测边界框。非极大值抑制
anchors = nd.array([[0.1, 0.08, 0.52, 0.92], [0.08, 0.2, 0.56, 0.95],
[0.15, 0.3, 0.62, 0.91], [0.55, 0.2, 0.9, 0.88]])
# 假设预测偏移量全是0:预测边界框即锚框
offset_preds = nd.array([0] * anchors.size)
cls_probs = nd.array([[0] * 4, # 背景的预测概率
[0.9, 0.8, 0.7, 0.1], # 狗的预测概率
[0.1, 0.2, 0.3, 0.9]]) # 猫的预测概率
# MultiBoxDetection函数来执行非极大值抑制并设阈值为0.5
output = contrib.ndarray.MultiBoxDetection(
cls_probs.expand_dims(axis=0), offset_preds.expand_dims(axis=0),
anchors.expand_dims(axis=0), nms_threshold=0.5)
output
# (0为狗,1为猫)-1表示背景或在非极大值抑制中被移除。第二个元素是预测边界框的置信度。
——————————————输出——————————————
[[[ 0. 0.9 0.1 0.08 0.52 0.92]
[ 1. 0.9 0.55 0.2 0.9 0.88]
[-1. 0.8 0.08 0.2 0.56 0.95]
[-1. 0.7 0.15 0.3 0.62 0.91]]]
<NDArray 1x4x6 @cpu(0)>
—————————————根据矩阵输出图形——————————————
# 除掉类别为-1的预测边界框,并可视化非极大值抑制保留的结果。
fig = d2l.plt.imshow(img)
for i in output[0].asnumpy():
if i[0] == -1:
continue
label = ('dog=', 'cat=')[int(i[0])] + str(i[1])
show_bboxes(fig.axes, [nd.array(i[2:]) * bbox_scale], label)# 将锚框变量y的形状变为(图像高,图像宽,以相同像素为中心的锚框个数,4)
boxes = Y.reshape((h, w, 5, 4))# 在任一图像上均匀采样fmap_h行fmap_w列个像素,并分别以它们为中心
# 生成大小为s(假设列表s长度为1)的不同宽高比(ratios)的锚框。
def display_anchors(fmap_w, fmap_h, s):
fmap = nd.zeros((1, 10, fmap_w, fmap_h)) # 前两维的取值不影响输出结果
anchors = contrib.nd.MultiBoxPrior(fmap, sizes=s, ratios=[1, 2, 0.5])
bbox_scale = nd.array((w, h, w, h))
d2l.show_bboxes(d2l.plt.imshow(img.asnumpy()).axes,
anchors[0] * bbox_scale)
display_anchors(fmap_w=4, fmap_h=4, s=[0.15])imgs = (batch.data[0][0:10].transpose((0, 2, 3, 1))) / 255
# 将10*3*256*256的数组变为10*256*256*3的数组再除以255# 本函数已保存在d2lzh包中方便以后使用
# 返回num_rows, num_cols的坐标轴。
def show_images(imgs, num_rows, num_cols, scale=2):
figsize = (num_cols * scale, num_rows * scale)
_, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
for i in range(num_rows):
for j in range(num_cols):
axes[i][j].imshow(imgs[i * num_cols + j].asnumpy())
axes[i][j].axes.get_xaxis().set_visible(False)
axes[i][j].axes.get_yaxis().set_visible(False)
return axes
axes = d2l.show_images(imgs, 2, 5).flatten()>>> a = np.array([[1,2], [3,4]])
>>> a.flatten() # 默认参数为"C",即按照行进行重组
array([1, 2, 3, 4])
>>> a.flatten('F') # 按照列进行重组
array([1, 3, 2, 4])# 0.边框预测层, 每个锚框4个偏移量
def bbox_predictor(num_anchors):
return nn.Conv2D(num_anchors * 4, kernel_size=3, padding=1)
# 0.类别预测层
def cls_predictor(num_anchors, num_classes):
return nn.Conv2D(num_anchors * (num_classes + 1), kernel_size=3,
padding=1)
# 0.转换维度后扁平化
def flatten_pred(pred):
return pred.transpose((0, 2, 3, 1)).flatten()
# 0.多尺度连结预测结果。
# 将预测结果转化为(批量大小, 高 × 宽 × 通道数),之后在维度1上连结
def concat_preds(preds):
return nd.concat(*[flatten_pred(p) for p in preds], dim=1)
# 1.基础网络块用来从原始图像中抽取特征。
# 该网络串联3个高和宽减半块,并逐步将通道数翻倍。
def base_net():
blk = nn.Sequential()
for num_filters in [16, 32, 64]:
blk.add(down_sample_blk(num_filters))
return blk
# 2.宽高减半块,需要先于基础网络块定义
# 宽高减半,可以改变通道数,每个感受野6*6
def down_sample_blk(num_channels):
blk = nn.Sequential()
for _ in range(2):
blk.add(nn.Conv2D(num_channels, kernel_size=3, padding=1),
nn.BatchNorm(in_channels=num_channels), # 批量归一化
nn.Activation('relu'))
blk.add(nn.MaxPool2D(2))
return blk
# 3.全局最大池化层
# 4.完整SSD模型
def get_blk(i):
if i == 0:
blk = base_net()
elif i == 4:
blk = nn.GlobalMaxPool2D()
else:
blk = down_sample_blk(128)
return blk
# 5.前向计算,返回(特征图Y,锚框anchors,预测类别,预测偏移量)
def blk_forward(X, blk, size, ratio, cls_predictor, bbox_predictor):
Y = blk(X)
anchors = contrib.ndarray.MultiBoxPrior(Y, sizes=size, ratios=ratio)
cls_preds = cls_predictor(Y)
bbox_preds = bbox_predictor(Y)
return (Y, anchors, cls_preds, bbox_preds)
# 6.定义大小,宽高比,锚框数
sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
[0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
num_anchors = len(sizes[0]) + len(ratios[0]) - 1
# 7.定义模型类
class TinySSD(nn.Block):
def __init__(self, num_classes, **kwargs):
super(TinySSD, self).__init__(**kwargs)
self.num_classes = num_classes
# 设置每层的神经网络层,类别预测函数,偏移预测函数
for i in range(5):
# 即赋值语句self.blk_i = get_blk(i)
setattr(self, 'blk_%d' % i, get_blk(i))
setattr(self, 'cls_%d' % i, cls_predictor(num_anchors,num_classes))
setattr(self, 'bbox_%d' % i, bbox_predictor(num_anchors))
def forward(self, X):
anchors, cls_preds, bbox_preds = [None] * 5, [None] * 5, [None] * 5
# 计算每层的(特征图Y,锚框anchors,预测类别,预测偏移量)
for i in range(5):
# getattr(self, 'blk_%d' % i)即访问self.blk_i
X, anchors[i],
cls_preds[i],
bbox_preds[i] = blk_forward(X,getattr(self, 'blk_%d' % i),
sizes[i], ratios[i],
getattr(self, 'cls_%d' % i),
getattr(self, 'bbox_%d' % i))
# reshape函数中的0表示保持批量大小不变
return (nd.concat(*anchors, dim=1),
concat_preds(cls_preds).reshape((0, -1, self.num_classes + 1)),
concat_preds(bbox_preds))
# 8.定义损失函数
# 有关锚框类别的损失,交叉熵损失函数
cls_loss = gloss.SoftmaxCrossEntropyLoss()
# 正类锚框偏移量的损失,L1 范数损失,即预测值与真实值之间差的绝对值。
bbox_loss = gloss.L1Loss()
# 总的损失函数
def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
cls = cls_loss(cls_preds, cls_labels)
bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks)
return cls + bbox
# 沿用准确率评价分类结果
def cls_eval(cls_preds, cls_labels):
# 由于类别预测结果放在最后一维,argmax需要指定最后一维
return (cls_preds.argmax(axis=-1) == cls_labels).sum().asscalar()
# L1 范数损失,我们用平均绝对误差评价边界框的预测结果。
def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
return ((bbox_labels - bbox_preds) * bbox_masks).abs().sum().asscalar()
# 9.训练模型
for epoch in range(20):
acc_sum, mae_sum, n, m = 0.0, 0.0, 0, 0
train_iter.reset() # 从头读取数据
start = time.time()
for batch in train_iter:
X = batch.data[0].as_in_context(ctx)
Y = batch.label[0].as_in_context(ctx)
with autograd.record():
# 生成多尺度的锚框,为每个锚框预测类别和偏移量
anchors, cls_preds, bbox_preds = net(X)
# 为每个锚框标注类别和偏移量
bbox_labels, bbox_masks, cls_labels = contrib.nd.MultiBoxTarget(
anchors, Y, cls_preds.transpose((0, 2, 1)))
# 根据类别和偏移量的预测和标注值计算损失函数
l = calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels,
bbox_masks)
l.backward()
trainer.step(batch_size)
acc_sum += cls_eval(cls_preds, cls_labels)
n += cls_labels.size
mae_sum += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
m += bbox_labels.size
if (epoch + 1) % 5 == 0:
print('epoch %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (
epoch + 1, 1 - acc_sum / n, mae_sum / m, time.time() - start))def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
cls = cls_loss(cls_preds, cls_labels)
# bbox = bbox_loss(bbox_preds * bbox_masks, bbox_labels * bbox_masks)
bbox = nd.smooth_l1(bbox_preds * bbox_masks - bbox_labels * bbox_masks, scale=0.3).mean(axis=1) # 通过预测和标签的差值,作为x输入平滑L1范数函数
return cls + bbox# 1.焦点损失函数定义,x为真实类别j的预测概率
def focal_loss(gamma, x):
return -(1 - x) ** gamma * x.log()
# 2.softmax函数,转化为概率。
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(axis=1, keepdims=True)
return X_exp / partition
# 3.计算总的损失函数
def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
# 这一步使用了取巧的方法,因为不知道怎么通过cls_preds和cls_labels来求概率
# 所以利用交叉熵公式-logpi反求出pi,再将其作为参数x输入焦点损失函数
# 但是貌似效果不好,暂无进一步解决方案。
cls = cls_loss(cls_preds, cls_labels)
cls = focal_loss(1.0 , (-cls).exp())
bbox = nd.smooth_l1(bbox_preds * bbox_masks - bbox_labels * bbox_masks, scale=0.3).mean(axis=1)
return cls + bbox# 兴趣池化层,只池化感兴趣的提议地方。
X = nd.arange(16).reshape((1, 1, 4, 4))
rois = nd.array([[0, 0, 0, 20, 20], [0, 0, 10, 30, 30]])
# 由于X的高宽是图像0.1,所以两个提议区域中的坐标先按spatial_scale自乘0.1,然后分别标出兴趣区域
nd.ROIPooling(X, rois, pooled_size=(2, 2), spatial_scale=0.1)# Faster R-CNN
!pip install gluoncv
from matplotlib import pyplot as plt
import gluoncv
from gluoncv import model_zoo, data, utils
# 预训练模型
net = model_zoo.get_model('faster_rcnn_resnet50_v1b_voc', pretrained=True)
# 下载一张图片
im_fname = utils.download('https://github.com/dmlc/web-data/blob/master/' +
'gluoncv/detection/biking.jpg?raw=true',
path='biking.jpg')
# 转换成加载图片
x, orig_img = data.transforms.presets.rcnn.load_test(im_fname)
# 前向计算
box_ids, scores, bboxes = net(x)
# 展示结果
ax = utils.viz.plot_bbox(orig_img, bboxes[0], scores[0], box_ids[0], class_names=net.classes)
plt.show()# 下载voc_pascal数据集,本函数已保存在d2lzh包中方便以后使用
def download_voc_pascal(data_dir='../data'):
voc_dir = os.path.join(data_dir, 'VOCdevkit/VOC2012')
url = ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012'
'/VOCtrainval_11-May-2012.tar')
sha1 = '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'
fname = gutils.download(url, data_dir, sha1_hash=sha1)
with tarfile.open(fname, 'r') as f:
f.extractall(data_dir)
return voc_dir
# 读取voc_pascal数据集的输入图和标签到内存,本函数已保存在d2lzh包中方便以后使用
def read_voc_images(root=voc_dir, is_train=True):
txt_fname = '%s/ImageSets/Segmentation/%s' % (
root, 'train.txt' if is_train else 'val.txt')
with open(txt_fname, 'r') as f:
images = f.read().split()
features, labels = [None] * len(images), [None] * len(images)
for i, fname in enumerate(images):
features[i] = image.imread('%s/JPEGImages/%s.jpg' % (root, fname))
labels[i] = image.imread(
'%s/SegmentationClass/%s.png' % (root, fname))
return features, labels
# 使用数据集
voc_train = VOCSegDataset(True, crop_size, voc_dir, colormap2label)
voc_test = VOCSegDataset(False, crop_size, voc_dir, colormap2label)
batch_size = 64
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(voc_train, batch_size, shuffle=True,
last_batch='discard', num_workers=num_workers)
test_iter = gdata.DataLoader(voc_test, batch_size, last_batch='discard',
num_workers=num_workers)class MyDense(nn.Block):
# units为该层的输出个数,in_units为该层的输入个数
def __init__(self, units, in_units, **kwargs):
super(MyDense, self).__init__(**kwargs)
self.weight = self.params.get('weight', shape=(in_units, units))
self.bias = self.params.get('bias', shape=(units,))
def forward(self, x):
linear = nd.dot(x, self.weight.data()) + self.bias.data()
return nd.relu(linear)
dense = MyDense(units=3, in_units=5)
dense.initialize()
dense(nd.random.uniform(shape=(1000, 5)))y = x.copyto(mx.gpu()) # 拷贝到GPU,耗内存
z = x.as_in_context(mx.gpu()) # 转换到GPU# Multilayer perceptrons,多层感知机
class MLP(nn.Block):
# 声明带有模型参数的层,这里声明了两个全连接层
def __init__(self, **kwargs):
# 调用MLP父类Block的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
# 参数,如“模型参数的访问、初始化和共享”一节将介绍的模型参数params
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Dense(256, activation='relu') # 隐藏层
self.output = nn.Dense(10) # 输出层
# 定义模型的前向计算,即如何根据输入x计算返回所需要的模型输出
# MLP类中无须定义反向传播函数。系统将通过自动求梯度而自动生成反向传播所需的backward函数。
def forward(self, x):
return self.output(self.hidden(x))
X = nd.random.uniform(shape=(2, 20))
net = MLP()
net.initialize()
net(X)net[0].params, type(net[0].params)
————————————————输出————————————————
(dense0_ (
Parameter dense0_weight (shape=(256, 20), dtype=float32)
Parameter dense0_bias (shape=(256,), dtype=float32)
), mxnet.gluon.parameter.ParameterDict)net[0].params['dense0_weight'], net[0].weight # 两者等价通常后者的代码可读性更好。
net[0].weight.data() # 访问权重数据
net[0].weight.grad() # 访问权重梯度
net[0].bias.data() # 访问偏差数据
net.collect_params() # 获取net()所嵌套的所有数据(权重和偏差)
net.collect_params('.*weight') # 通过正则表达式获取所有权重# 非首次对模型初始化需要指定force_reinit为真,默认仅初始化权重,偏差清零
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
# 使用Xavier(特殊均值分布)的方法初始化权重
net[0].weight.initialize(init=init.Xavier(), force_reinit=True) ## 自定义初始化
class MyInit(init.Initializer):
# 只需要实现_init_weight这个函数,并将其传入的NDArray修改成初始化的结果。
def _init_weight(self, name, data):
print('Init', name, data.shape)
data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
data *= data.abs() >= 5
net.initialize(MyInit(), force_reinit=True)net.add(nn.Dense(8, activation='relu'),
shared,
nn.Dense(8, activation='relu', params=shared.params),
nn.Dense(10))# 通过重新初始化来避免
net.initialize(init=MyInit(), force_reinit=True)
# 第二种情况是我们在创建层的时候指定了它的输入个数
net = nn.Sequential()
net.add(nn.Dense(256, in_units=20, activation='relu'))
net.add(nn.Dense(10, in_units=256))
net.initialize(init=MyInit())# 它使用ReLU函数作为激活函数。其中in_units和units分别代表输入个数和输出个数。
class MyDense(nn.Block):
# units为该层的输出个数,in_units为该层的输入个数
def __init__(self, units, in_units, **kwargs):
super(MyDense, self).__init__(**kwargs)
self.weight = self.params.get('weight', shape=(in_units, units))
self.bias = self.params.get('bias', shape=(units,))
def forward(self, x):
linear = nd.dot(x, self.weight.data()) + self.bias.data()
return nd.relu(linear)
dense = MyDense(units=3, in_units=5)
dense.initialize()
dense(nd.random.uniform(shape=(2, 5))) # 直接使用自定义层做前向计算。
# 嵌套使用
net = nn.Sequential()
net.add(MyDense(8, in_units=64),
MyDense(1, in_units=8))
net.initialize()
net(nd.random.uniform(shape=(2, 64)))nd.save('filename',params) # 存储为文件
params = nd.load('filename') # 从文件读取
net.save_parameters(filename) # 存储参数文件
net2.load_parameters(filename) # 读取参数文件# 偷看了9.1图像增广的答案
# 本函数已保存在d2lzh包中方便以后使用
def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
if isinstance(ctx, mx.Context):
ctx = [ctx]
acc_sum, n = nd.array([0]), 0
for batch in data_iter:
features, labels, _ = _get_batch(batch, ctx)
for X, y in zip(features, labels):
y = y.astype('float32')
acc_sum += (net(X).argmax(axis=1) == y).sum().copyto(mx.cpu())
n += y.size
acc_sum.wait_to_read()
return acc_sum.asscalar() / n# 定义上下文
ctx = [mx.gpu(0),mx.cpu(0)]
# 修改后的训练函数:不要传入gpu数量,而是传入ctx
def train(ctx, batch_size, lr):
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
print('running on:', ctx)
net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
trainer = gluon.Trainer(
net.collect_params(), 'sgd', {'learning_rate': lr})
loss = gloss.SoftmaxCrossEntropyLoss()
for epoch in range(4):
start = time.time()
for X, y in train_iter:
gpu_Xs = gutils.split_and_load(X, ctx)
gpu_ys = gutils.split_and_load(y, ctx)
with autograd.record():
ls = [loss(net(gpu_X), gpu_y)
for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)]
for l in ls:
l.backward()
trainer.step(batch_size)
nd.waitall()
train_time = time.time() - start
test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
print('epoch %d, time %.1f sec, test acc %.2f' % (
epoch + 1, train_time, test_acc))y = compile(prog, '', 'exec')
exec(y)class HybridNet(nn.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
self.hidden = nn.Dense(10)
self.output = nn.Dense(2)
# 是hybrid_forward而不是forward,且需要F来决定使用哪个类
# MXNet有基于命令式编程的NDArray类(默认)和基于符号式编程的Symbol类。
def hybrid_forward(self, F, x):
print('F: ', F)
print('x: ', x)
x = F.relu(self.hidden(x))
print('hidden: ', x)
return self.output(x)
net = HybridNet()
net.initialize()
x = nd.random.normal(shape=(1, 4))
net.hybridize() # 能提升性能。
net(x)
net(x)
# 在hybrid_forward函数里,相同输入和中间输出全部变成了Symbol类型,再次前向后不再打印输出。
# 对于原地操作a += b和a[:] = a + b(需改写为a = a + b)class Benchmark(): # 本类已保存在d2lzh包中方便以后使用
def __init__(self, prefix=None):
self.prefix = prefix + ' ' if prefix else ''
def __enter__(self):
self.start = time.time()
def __exit__(self, *args):
print('%stime: %.4f sec' % (self.prefix, time.time() - self.start))
# 通过这个基准类来测试时间。
with Benchmark('Workloads are queued.'):
x = nd.random.uniform(shape=(2000, 2000))
y = nd.dot(x, x).sum()# 可以把各块显卡的显存上的数据加起来,然后再广播到所有的显存上。
def allreduce(data):
for i in range(1, len(data)):
data[0][:] += data[i].copyto(data[0].context)
for i in range(1, len(data)):
data[0].copyto(data[i])
# 将data平摊到ctx上
def split_and_load(data, ctx):
n, k = data.shape[0], len(ctx)
m = n // k # 简单起见,假设可以整除
assert m * k == n, '# examples is not divided by # devices.'
return [data[i * m: (i + 1) * m].as_in_context(ctx[i]) for i in range(k)]# 多GPU的小批量训练
def train_batch(X, y, gpu_params, ctx, lr):
# 当ctx包含多块GPU及相应的显存时,将小批量数据样本划分并复制到各个显存上
gpu_Xs, gpu_ys = split_and_load(X, ctx), split_and_load(y, ctx)
with autograd.record(): # 在各块GPU上分别计算损失
ls = [loss(lenet(gpu_X, gpu_W), gpu_y)
for gpu_X, gpu_y, gpu_W in zip(gpu_Xs, gpu_ys, gpu_params)]
for l in ls: # 在各块GPU上分别反向传播
l.backward()
# 把各块显卡的显存上的梯度加起来,然后广播到所有显存上
for i in range(len(gpu_params[0])):
allreduce([gpu_params[c][i].grad for c in range(len(ctx))])
for param in gpu_params: # 在各块显卡的显存上分别更新模型参数
d2l.sgd(param, lr, X.shape[0]) # 这里使用了完整批量大小# 完整的训练函数
def train(num_gpus, batch_size, lr):
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
ctx = [mx.gpu(i) for i in range(num_gpus)]
print('running on:', ctx)
# 将模型参数复制到num_gpus块显卡的显存上
gpu_params = [get_params(params, c) for c in ctx]
for epoch in range(4):
start = time.time()
for X, y in train_iter:
# 对单个小批量进行多GPU训练
train_batch(X, y, gpu_params, ctx, lr)
nd.waitall()
train_time = time.time() - start
def net(x): # 在gpu(0)上验证模型
return lenet(x, gpu_params[0])
test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
print('epoch %d, time %.1f sec, test acc %.2f'
% (epoch + 1, train_time, test_acc))train(num_gpus=1, batch_size=256, lr=0.2) # 调用,输入gpu数量即可training on gpu(0)
epoch 1, loss 1.5553, train acc 0.419, test acc 0.684, time 94.8 sec
epoch 2, loss 2.1125, train acc 0.209, test acc 0.228, time 91.7 sec
epoch 3, loss 1.8953, train acc 0.313, test acc 0.367, time 91.7 sec
epoch 4, loss 1.7361, train acc 0.367, test acc 0.373, time 91.3 sec
epoch 5, loss 1.6190, train acc 0.410, test acc 0.435, time 91.5 sec # 修改残差块中的前向函数如下
def forward(self, X):
Y = nd.relu(self.conv1(self.bn1(X)))
Y = self.conv2(self.bn2(Y))
if self.conv3:
X = self.conv3(X)
return nd.relu(Y + X)
————————————运行结果————————————————————
training on gpu(0)
epoch 1, loss 0.4935, train acc 0.825, test acc 0.896, time 85.4 sec
epoch 2, loss 0.2639, train acc 0.903, test acc 0.905, time 81.2 sec
epoch 3, loss 0.2000, train acc 0.926, test acc 0.915, time 82.9 sec
epoch 4, loss 0.1534, train acc 0.944, test acc 0.909, time 83.3 sec
epoch 5, loss 0.1109, train acc 0.960, test acc 0.882, time 83.2 sec# Definition for singly-linked list.
class ListNode:
def __init__(self, x):
self.val = x
self.next = None
# 遍历方法
# while head.next!=None: # 或while head!=None:
# head=head.next对于ASCII字符,可以使用内建的ord和chr方法实现需求:
>>> chr(97)
'a'
>>> ord('a')
97
对于Unicode字符,需要使用ord和repr,获得unicode字符的方法,使用unichr:
>>> print ord(u'\u2020')
8224
>>> print repr(unichr(8224))
u'\u2020'l = [1,2,3]
all(i in l for i in range(1,4)) # 用于判断是否全部非空
# all函数,用于判断给定的可迭代参数 iterable 中的所有元素是否都为 TRUE,如果是返回 True,否则返回 False。元素除了是 0、空、None、False 外都算 True。from collections import Counter
Counter(s) # 统计list元素个数,返回dictionary
sorted(counter.items(),key=lambda x:x[0]) # 按照key的大小排序dictionary,第一个参数要iterable
boolverbose = not boolverbose # 取反
dictionary = {} # 字典定义
dictionary.append(key) # 字典添加
dictionary.pop(key) # 字典删除net.add(nn.Dense(8, activation='relu'),
shared,
nn.Dense(8, activation='relu', params=shared.params),
nn.Dense(10))
net[0],net[1],net[2],net[3]
————————————————————输出——————————————————————————
(Dense(None -> 8, Activation(relu)),
Dense(None -> 8, Activation(relu)),
Dense(None -> 8, Activation(relu)),
Dense(None -> 10, linear))def run(x):
with d2l.Benchmark('Run.'):
list = [nd.dot(x, x) for _ in range(10)]
return list
x_cpu = nd.random.uniform(shape=(20, 20))
x_gpu = nd.random.uniform(shape=(20, 20), ctx=mx.gpu(0))
run(x_cpu)
run(x_gpu)
nd.waitall()
——————————结果——————————————
Run. time: 0.0006 sec
Run. time: 0.0000 sec def lowestCommonAncestor(self, root: TreeNode, p: TreeNode, q: TreeNode) -> TreeNode:
if not root or root == p or root == q: return root # 找到p或q
left = self.lowestCommonAncestor(root.left, p, q) # pq谁先找到就返回谁
right = self.lowestCommonAncestor(root.right, p, q) #pq谁先找到就先返回谁
if not left: return right # 若pq都在右边,那么最先找到right的就是最近公共祖先
if not right: return left # 若pq都在左边,那么最先找到left的就是最近公共祖先
return root # 若pq在左右两边都有,那么root就是结果class Solution:
def lowestCommonAncestor(self, root: 'TreeNode', p: 'TreeNode', q: 'TreeNode') -> 'TreeNode':
# 或p,q在同一侧,p,q某一个先遍历到,root.val==p.val or root.val==q.val
if not root or root == p or root == q: return root
if root.val>p.val and root.val>q.val: # p,q在左子树中
return self.lowestCommonAncestor(root.left,p,q)
elif root.val<p.val and root.val<q.val: # p,q在右子树中
return self.lowestCommonAncestor(root.right,p,q)
else: # p,q在左右子树中,root为结果
return root # 内置函数bin转化二进制
>>>bin(10)
'0b1010'
>>> bin(20)
'0b10100'
# 通过key指定两个排序,先按1的个数排序,再按数值排序
sorted(arr, key=lambda x: (bin(x).count('1'),x))
# 为list拓展list
arr.extend(sorted(dictionary[i]))class Solution:
def increasingBST(self, root):
def inorder(node):
if node:
yield from inorder(node.left) # 生成器
yield node.val
yield from inorder(node.right)
ans = cur = TreeNode(None) #初始化为空
for v in inorder(root):
cur.right = TreeNode(v)
cur = cur.right # 指针
return ans.right>>> from itertools import product as product
>>> A = [1, 2, 3]
>>> B = [100, 200, 300]
>>> for item in product(A, B):
... print(item)
...
(1, 100)
(1, 200)
(1, 300)
(2, 100)
(2, 200)
(2, 300)
(3, 100)
(3, 200)
(3, 300) # 递归法
# 改成在函数里面嵌套函数才可以
# 不知道为什么直接用postorder作为递归函数会出问题,全局变量缓存?
def postorder(self, root: 'Node') -> List[int]:
result = []
def post(root):
if not root: #结点不存在直接返回
return
else: # 存在结点
if root.children: # 有孩子,优先遍历孩子
for i in root.children:
post(i)
result.append(root.val)
return result
post(root)
return result
# 迭代法,使用栈模拟树的遍历
def postorder(self, root: 'Node') -> List[int]:
if not root:
return None
stack_run = [root]
result = []
while stack_run:
node = stack_run.pop()
result.append(node.val)
children = node.children
for child in children:
if child:
stack_run.append(child)
result.reverse()
return result














class Solution:
def longestWord(self, words: List[str]) -> str:
res='' # 结果
trie=Trie() # 初始化字典树(前缀树)
for word in words: #插入前缀树
trie.insert(word)
print(trie.root.children['a'].children)
for word in words:
if trie.search(word):
if len(word) > len(res): # 搜索得到,且长度更大
res=word
elif len(word)==len(res) and word < res: # 长度相同,但字典序更小
res=word
return res
class TrieNode:
def __init__(self):
self.end=False # 表示是否存在某个结点
self.children=collections.defaultdict(TrieNode) # 儿子字典,所有键默认为TrieNode
class Trie:
def __init__(self):
self.root=TrieNode()
def insert(self, word: str) -> None:
node=self.root
for s in word:
node=node.children[s] # node.children[s]默认为TrieNode,这里自动新建儿子
node.end=True
def search(self, word: str) -> bool:
node=self.root
for s in word:
node=node.children.get(s) # 获得键s,没有则返回None
if node is None or not node.end: # 找不到该字符串
return False
return Trueclass UnionFindSet(object):
def __init__(self, nodes):
'''
初始化并查集
'''
# 记录每个节点的父节点
self.fatherMap = {}
# 各集合的数量
self.setNumMap = {}
# 初始化, 每个节点自成一派
for node in nodes:
self.fatherMap[node] = node
self.setNumMap[node] = 1
def findFather(self, node):
'''
递归逻辑:返回当前节点的父节点;
'''
father = self.fatherMap[node]
if (node != father):
father = self.findFather(father)
# 路径压缩
self.fatherMap[node] = father
return father
def isSameSet(self, a, b):
'''
判断两个节点a和b是否属于同一集合
'''
return self.findFather(a) == self.findFather(b)
def union(self, a, b):
'''
合并集合a到集合b中
'''
if a is None or b is None:
return
aFather=self.findFather(a)
bFather = self.findFather(b)
if (aFather != bFather):
# 获取a,b集合的数量
aNum=self.setNumMap[aFather]
bNum=self.setNumMap[bFather]
# a集合加入b的集合中
self.fatherMap[aFather]=bFather
self.setNumMap[bFather]=aNum + bNum
# 删除aFather对应的人数纪录
self.setNumMap.pop(aFather)
















class TrieNode:
def __init__(self):
self.end=False # 表示是否存在某个结点
self.children=collections.defaultdict(TrieNode) # 儿子字典,所有键默认为TrieNode
class Trie:
def __init__(self):
"""
Initialize your data structure here.
"""
self.root = TrieNode()
def insert(self, word: str) -> None:
"""
Inserts a word into the trie.
"""
node = self.root
for s in word:
node = node.children[s] # node.children[s]默认为TrieNode,这里自动新建儿子
node.end = True
# print("insert " + word)
def search(self, word: str) -> bool:
"""
Returns if the word is in the trie.
"""
node = self.root
# print("search " + word)
for s in word:
node = node.children.get(s) # 获得键s,没有则返回None
if node is None: # 前缀匹配失败
return False
if node.end: # 该字符有结尾,即匹配成功
return True
else:
return False
def startsWith(self, prefix: str) -> bool:
"""
Returns if there is any word in the trie that starts with the given prefix.
"""
node = self.root
for s in prefix:
node = node.children.get(s) # 获得键s,没有则返回None
if node is None: # 前缀匹配失败
return False
return Trueclass Solution:
def findMaximumXOR(self, nums: List[int]) -> int:
L = len(bin(max(nums))) - 2 # 最大数值的二进制长度
max_xor = 0
for i in range(L)[::-1]: # 从最大位开始遍历
# go to the next bit by the left shift
max_xor <<= 1
# curr_xor的最后一位默认为1
curr_xor = max_xor | 1
# compute all existing prefixes
# of length (L - i) in binary representation
prefixes = {num >> i for num in nums} # 所有前缀
# Update max_xor, if two of these prefixes could result in curr_xor.
# Check if p1^p2 == curr_xor, i.e. p1 == curr_xor^p2
# print(max_xor,curr_xor, prefixes)
max_xor |= any(curr_xor^p in prefixes for p in prefixes)
return max_xorfrom collections import defaultdict
dict1 = defaultdict(int)
dict2 = defaultdict(set)
dict3 = defaultdict(str)
dict4 = defaultdict(list)
# 如果key为空,返回int,set,str,list的默认空值class MyHashSet:
def __init__(self):
"""
Initialize your data structure here.
"""
self.keyRange = 769 # 质数
self.bucketArray = [Bucket() for i in range(self.keyRange)]
def _hash(self, key):
return key % self.keyRange
def add(self, key: int) -> None:
bucketIndex = self._hash(key)
self.bucketArray[bucketIndex].insert(key)
def remove(self, key: int) -> None:
bucketIndex = self._hash(key)
self.bucketArray[bucketIndex].delete(key)
def contains(self, key: int) -> bool:
"""
Returns true if this set contains the specified element
"""
bucketIndex = self._hash(key)
return self.bucketArray[bucketIndex].exists(key)
class Node:
def __init__(self, value, nextNode=None):
self.value = value
self.next = nextNode
class Bucket:# 在头部插入
'''
哈希集合的元素
'''
def __init__(self):
self.head = Node(-1)
def exists(self, val):
cur = self.head.next
while cur!=None:
if cur.value==val:
return True
cur = cur.next
return False
def insert(self, newVal):
if not self.exists(newVal):
newNode = Node(newVal, self.head.next)
self.head.next = newNode
def delete(self, val):
pre = self.head
cur = self.head.next
while cur is not None:
if cur.value == val:
pre.next = cur.next
return
pre = cur
cur = cur.nextself.tripleStack = [None]*stackSize*3
self.top = [0, stackSize , stackSize*2] # 栈顶指针,也是下一个数据的存放位置
self.roof = [stackSize, stackSize*2, stackSize*3] # 栈顶指针最大位置
self.bottom = [0, stackSize, stackSize*2] # 栈顶指针最小位置


















class NumArray:
def __init__(self, nums: List[int]):
'''初始化,总时间 O(n)'''
self._nums = [0] + nums # 树状数组
n = len(nums)
# 初始化
for i in range(1, n + 1):
j = i + self.lowbit(i) # 寻找父结点
if j < n + 1:
self._nums[j] += self._nums[i] # 父结点的值=子结点的值的和
def lowbit(self, x: int) -> int:
'''低位计数:返回最小一位1的值,例如0b0010返回0b10'''
return x & (-x)
def update(self, idx: int, val: int):
'''将原数组idx下标更新为val, 总时间O(log n)'''
prev = self.sumRange(idx, idx) # 计算出原来的值
idx += 1 # 下标从1开始
val -= prev # val 是要增加的值,可正可负
while idx < len(self._nums): #修改自己及其父结点的值
self._nums[idx] += val
idx += self.lowbit(idx)
def _query(self, idx: int) -> int:
'''计算数组[0, idx)的元素之和'''
res = 0
while idx > 0:
res += self._nums[idx]
idx -= self.lowbit(idx) # 寻找儿子结点
return res
def sumRange(self, i: int, j: int) -> int:
'''返回数组[begin, end] 的和'''
return self._query(j+1) - self._query(i)
# Your NumArray object will be instantiated and called as such:
# obj = NumArray(nums)
# obj.update(i,val)
# param_2 = obj.sumRange(i,j)














def maxNum(nums,start):
'''
返回数组内的最大值及其相对索引
start表示数组在原数组中的起始点
'''
l = len(nums)
max_num = nums[0]
idx = start
for i in range(l):
if nums[i] >= max_num:
max_num = nums[i]
idx = start+i
return max_num,idx# 先将n-1个移动到缓冲,再将最大一个移到目标
def hanota(n, A, B, C):
'''
将A中n个移到C,以B作为缓冲
'''
# 若只有一个,直接移入
if n == 1:
C.append(A.pop())
return
# 将A中n-1个移到B,以C作为缓冲
hanota(n-1, A, C, B)
# 将A中剩余最大一个,移到C
C.append(A.pop())
# 将B中n-1个移到C,以A作为缓冲
hanota(n-1, B, A, C)
hanota(len(A), A, B, C)










































class Solution:
def transpose(self, A: List[List[int]]) -> List[List[int]]:
# *A 表示任意多的参数
# zip(A[0],A[1],...)表示将所有的数组的第i个压缩成元组
return [list(i) for i in zip(*A)]# 高阶函数
from functools import lru_cache
class Solution:
def stoneGame(self, piles):
N = len(piles)
# last recently unused,缓存,None表示无限制
@lru_cache(None)
def dp(i, j):
# The value of the game [piles[i], piles[i+1], ..., piles[j]].
if i > j: return 0
parity = (j - i) % 2 # 判断是否是亚历克斯(先手)
if parity == 1: # first player
return max(piles[i] + dp(i+1,j), piles[j] + dp(i,j-1)) # 越大,越能获胜
else:
return min(-piles[i] + dp(i+1,j), -piles[j] + dp(i,j-1)) # 越小,越能减少对手获得的分数
return dp(0, N - 1) > 0# 方法一
from functools import lru_cache
class Solution:
def getMoneyAmount(self, n: int) -> int:
# dp = [0,0,1,3,4,6,8,10]
@lru_cache(None)
def dp(i,j):
'''
dp(i,j):从i-j所需要的最少能保证赢的钱数
'''
if i>=j:
return 0
else:
num = 0xffffffff
for x in range(i,j+1): # 假设猜了x
temp = max(dp(i,x-1),dp(x+1,j))+x # 左右两边中更花钱的可能(为了保证赢)
if num>temp: # 选择更少的(为了用最少的花费保证能赢)
num = temp
return num
return dp(1,n)if n == 1:
return 1.0
else:
return 1.0/n + (n-2)/n*self.nthPersonGetsNthSeat(n-1)'''
import calendar
# calendar.monthrange(year,month)获取每月第一天的起始星期和总天数
weekend = (day+calendar.monthrange(year,month)[0])%7
result = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"]
return result[weekend]
'''
import datetime
result = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday","Sunday"]
# 先用datetime.date创建简单日期对象,再用weekday()返回对应星期
weekend = datetime.date(year,month,day).weekday()
return result[weekend]for i in range(N):
temp = abs(nums[i])-1
if nums[temp]>0:
nums[temp] = -nums[temp]import random
print( random.randint(1,10) ) # 产生 1 到 10 的一个整数型随机数
print( random.random() ) # 产生 0 到 1 之间的随机浮点数
print( random.uniform(1.1,5.4) ) # 产生 1.1 到 5.4 之间的随机浮点数,区间可以不是整数
print( random.choice('tomorrow') ) # 从序列中随机选取一个元素
print( random.randrange(1,100,2) ) # 生成从1到100的间隔为2的随机整数
a=[1,3,5,6,7] # 将序列a中的元素顺序打乱
random.shuffle(a)
print(a)def getSum(self, a, b):
# 2^32
MASK = 0x100000000
# 整型最大值
MAX_INT = 0x7FFFFFFF # 第一位表示正负,所以整数最大值=2^31-1
MIN_INT = MAX_INT + 1 # 负数的补码是在反码的基础上+1,负数最小值=-2^31
#变成补码后,正负数可以直接相加,而后通过符号位判断正负
while b != 0:
# 计算进位
carry = (a & b) << 1
# 取余范围限制在 [0, 2^32-1] 范围内
a = (a ^ b) % MASK
b = carry % MASK
return a if a <= MAX_INT else ~(a^0xFFFFFFFF)from itertools import combinations, permutations
h_m = ['8','4','2','1',32,16,8,4,2,1]
list(combinations(h_m,num)) #获得组合情况(有序不重复)
list(permutations(h_m,num)) #获得排列情况(无序有重复)









# leetcode 剑指offer65
class Solution:
def add(self, a: int, b: int) -> int:
# 无变量位数的概念
x = 0xffffffff # 4294967295
a, b = a & x, b & x # 从无限长度变为一个 32 位整数
while b != 0:
a, b = (a ^ b), (a & b) << 1 & x
# ~(a ^ x) 是将 32 位以上的位取反,1 至 32 位不变。
# 补码中 11111...1110 是 -2
# 11111...1111 是 -1
# 补码可以让正数和负数直接用加法运算
return a if a <= 0x7fffffff else ~(a ^ x) """
(1)堆的数据要基于链表(List)进行操作(堆中的数据是基于链表进行操作)。
(2)堆直接基于链表操作,不再开辟新的存储空间。
(3)堆头永远都是最小的值。
(4)堆的检索是根据中序遍历方式:根节点 --> 左节点 -->右节点
"""
import heapq
# (1)创建一个空堆,并加入数据
heap = []
for item in [2, 3, 1, 4]:
heapq.heappush(heap, item)
print(heap) # 输出 [1, 3, 2, 4]
# (2)根据链表构建一个堆 --> heapify
l = [2, 3, 1, 4]
heapq.heapify(l)
print(l) # 输出 [1, 3, 2, 4]
# (2)向堆中追加元素 -->heappush
heapq.heappush(l, -10)
print(l) # 输出 [-10, 1, 2, 4, 3]
# (3) 弹出堆头(返回堆头之后堆再进行翻转,堆头保持最小值) -->heappop
print(heapq.heappop(l)) # 输出 -10
print(l) # 输出 [1, 3, 2, 4]
print(heapq.heappop(l)) # 输出 1
print(l) # 输出 [2, 3, 4]
# (4) 替换第一个元素,并构建堆 --> heapreplace
l = [2, 3, 1, 4]
print(heapq.heapreplace(l, 100)) # 输出 2
print(l) # 输出 [1, 3, 100, 4]
# (5)合并多个链表 --> merge
l = [1, 3, 2]
l2 = [5, 2, 3]
l3 = [9, 2, 3, 1]
print(list(heapq.merge(l, l2, l3))) # 输出 [1, 3, 2, 5, 2, 3, 9, 2, 3, 1]
# (6)多路归并 --> merge
# 对每一个链表进行排序,再对排序后的列表进行合并
print(list(heapq.merge(sorted(l), sorted(l2), sorted(l3))))
# (7)返回最大的元素 --> nlargest
l = [2, 3, 1, 4]
print(heapq.nlargest(2, l)) # 输出 [4, 3]
# (8)返回最小的元素 --> nsmallest
l = [2, 3, 1, 4]
print(heapq.nsmallest(2, l)) # 输出 [1, 2]
# (9)向堆中追加一个数据,再弹出堆头(弹出后堆不会发生翻转) --> heappushpop
l = [2, 3, 1, 4]
print(heapq.heappushpop(l, -10)) # 输出 -10
print(l) # 输出 [2, 3, 1, 4]
稳定,分解再合并,时间O(nlog^n),空间O(n)
def merge_sorted(arr):
"""归并排序:二分数组+合并两个有序数组
比较性:排序时元素之间需要比较,所以为比较排序
稳定性:当左边的元素小于等于右边的元素就把左边的排前面,而原本左边的就是在前面,所以相同元素的相对顺序不变,故为稳定排序
时间复杂度:O(nlog^n),排序算法下界
空间复杂度:O(n),在合并子列时需要申请临时空间,而且空间大小随数列的大小而变化
记忆方法:所谓归并肯定是要先分解,再合并
Args:
arr (List): 要进行归并排序的数组
Returns:
List: 排序后的arr数组
"""
if len(arr) == 1:
return arr
mid = len(arr) // 2
left, right = arr[:mid], arr[mid:]
return merge(merge_sorted(left), merge_sorted(right))
def merge(left, right):
"""合并阶段
Args:
left (List): 已经递归二分的有序的左数组,极端情况下只有一个
right (List): 已经递归二分的有序的右数组,极端情况下只有一个
Returns:
List: 合并left和right两个数组的新数组
"""
res = []
while len(left) > 0 and len(right) > 0:
if left[0] < right[0]:
res.append(left.pop(0))
else:
res.append(right.pop(0))
res += left
res += right
return res
print(merge_sorted([1, 7, 9, 10, 3, 4, 5, 3, 4, 2, 3, 8]))
def merge_sort(l, r):
"""快速版本的归并排序(借鉴)
Args:
l (int): 最左区间
r (int): 最右区间(闭而不是开)
Returns:
None: 排完在nums上
"""
# 终止条件
if l >= r:
return 0
# 递归划分
m = (l + r) // 2
merge_sort(l, m)
merge_sort(m + 1, r)
# 合并阶段
i, j = l, m + 1
# 暂存l,r之间的数组
tmp[l : r + 1] = nums[l : r + 1]
for k in range(l, r + 1):
# 左数组遍历结束
if i == m + 1:
nums[k] = tmp[j]
j += 1
# 右数组遍历结束 或者 左节点小于等于右节点
elif j == r + 1 or tmp[i] <= tmp[j]:
nums[k] = tmp[i]
i += 1
# 左节点大于右节点
else:
nums[k] = tmp[j]
j += 1
# res += m - i + 1 # 统计逆序对
nums = [1, 7, 9, 10, 3, 4, 5, 3, 4, 2, 3, 8]
tmp = [0] * len(nums)
merge_sort(0, len(nums) - 1)
print(nums)
import this
"""
The Zen of Python, by Tim Peters
Beautiful is better than ugly.
漂亮比丑好(格式)
Explicit is better than implicit.
显性比隐性好(调用)
Simple is better than complex.
简单比麻烦好
Complex is better than complicated.
麻烦比复杂好
Flat is better than nested.
平面比嵌套好(代码顺序)
Sparse is better than dense.
稀疏比稠密好(向量)
Readability counts.
可读性很重要
Special cases aren't special enough to break the rules.
特殊情况也要在规则之内
Although practicality beats purity.
虽然实用性比纯粹重要
Errors should never pass silently.
错误不应该静默
Unless explicitly silenced.
除非声明让它静默
In the face of ambiguity, refuse the temptation to guess.
模棱两可下,不要让人去猜
There should be one-- and preferably only one --obvious way to do it.
应该只有一个——最好只有一个——明显的理解方式
Although that way may not be obvious at first unless you're Dutch.
虽然这样要求无法一开始实现,除非你是个荷兰人
Now is better than never.
现在开始要求,总比从未要求好
Although never is often better than *right* now.
尽管从来没有什么比现在“正确”更好。
If the implementation is hard to explain, it's a bad idea.
如果这个实现难以解释,这是一个糟糕的计划
If the implementation is easy to explain, it may be a good idea.
如果这个实现简单理解,这是一个很好的计划
Namespaces are one honking great idea -- let's do more of those!
命名空间是一个很棒的主意——让我们做更多些吧!
"""
# 生成器yield,避免内存溢出
def fibonacci():
num0 = 0
num1 = 1
for i in range(10):
num2 = num0 + num1
yield num2
num0 = num1
num1 = num2
for i in fibonacci():
print(i)
# for-else简化循环
for cc in ['UK','ID','JP','US']:
if cc == 'CN':
break
else:
print('no CN')
# try-else简化异常
"""
try:
db.execute("UPDATE table SET xx = WHERE yy = yy")
except DBError:
db.rollback()
else:
db.commit()
"""
# with自动管理资源
with open('pythonic.py') as fp:
for line in fp:
print(line[:-1])
"""
1.调用open,返回对象obj
2.调用obj.__enter__(),返回并赋值给fp
3.执行with的代码块
4.执行obj.__exit__()
5.如果发生异常,传给obj.__exit__(),返回False异常继续抛出,否则挂起继续运行
"""
# 列表推导与生成器表达式
squares = [ x * x for x in range(10)]
print(squares)
squares = ( x * x for x in range(10))
for i in squares:
print(i)
# items遍历map
m = {'one':1, 'two':2,'three':3}
for k,v in m.items():
print(k,v)
