Pytorch整理

前言

Pytorch相对于Tensorflow来说更适合做研究,主要原因就是Pytorch是动态图,也就是不用向Tensorflow一样先构建好计算图,然后在session中运算,这一点对于debug非常之不方便!

究竟有什么大的优点,贴一个知乎上的回答:

所以这篇博客主要记录Pytorch的入门code,具体入门是遵循莫烦的教程!

线性回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
# encoding: utf-8
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt

x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
y = x * x + 0.2 * torch.rand(x.size())
x, y = Variable(x), Variable(y)


class Net(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(input_size, hidden_size)
self.prediction = torch.nn.Linear(hidden_size, output_size)

def forward(self, x):
x = F.relu(self.hidden(x))
x = self.prediction(x)
return x


net = Net(1, 10, 1)
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
loss_fn = torch.nn.MSELoss()

plt.ion()
plt.show()

for step in range(1000):
prediction = net(x)
optimizer.zero_grad()
loss = loss_fn(prediction, y)
loss.backward()
optimizer.step()
if step % 10 == 0:
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 1.25, 'Step:{},Loss:{:.3f}'.format(step, loss.data[0]),
fontdict={'size': 12, 'color': 'red'})
plt.pause(0.1)

plt.ioff()

分类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
# encoding: utf-8
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np

n_data = torch.ones(100, 2)
x0 = torch.normal(2 * n_data, 1)
y0 = torch.zeros(100)

x1 = torch.normal(-2 * n_data, 1)
y1 = torch.ones(100)

x = torch.cat((x0, x1), dim=0).type(torch.FloatTensor)
y = torch.cat((y0, y1), dim=0).type(torch.LongTensor)

x, y = Variable(x), Variable(y)


class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.out = torch.nn.Linear(n_hidden, n_output)

def forward(self, x):
x = F.relu(self.hidden(x))
x = self.out(x)
return x


net = Net(2, 10, 2)
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
loss_func = torch.nn.CrossEntropyLoss()

for i in range(100):
out = net(x)
loss = loss_func(out, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
prediction = torch.max(out, 1)[1]
pred_y = prediction.data.numpy().squeeze()
target_y = y.data.numpy()
accuracy = np.sum(pred_y == target_y) / 200
print('Step:{}\tAccuracy:{:.2f}'.format(i, accuracy))

这里面讲道理是需要softmax函数的,但事实上貌似也不需要.

更简单的模型搭建方式

1
2
3
4
5
net2 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)

保存模型和重新加载模型

1
2
3
4
5
torch.save(net1, 'net.pkl')  # 保存整个网络
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存网络中的参数 (速度快, 占内存少)

net2 = torch.load('net.pkl')
net3.load_state_dict(torch.load('net_params.pkl'))# 这里面net3要提前定义好

数据管理工具

1
2
3
4
5
6
7
8
9
10
11
import torch.utils.data as Data
# 先转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)

# 把 dataset 放入 DataLoader
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=2, # 多线程来读数据
)

RNN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()

self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了
input_size=28, # 图片每行的数据像素点
hidden_size=64, # rnn hidden unit
num_layers=1, # 有几层 RNN layers
batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
)

self.out = nn.Linear(64, 10) # 输出层

def forward(self, x):
# x shape (batch, time_step, input_size)
# r_out shape (batch, time_step, output_size)
# h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
# h_c shape (n_layers, batch, hidden_size)
r_out, (h_n, h_c) = self.rnn(x, None) # None 表示 hidden state 会用全0的 state

# 选取最后一个时间点的 r_out 输出
# 这里 r_out[:, -1, :] 的值也是 h_n 的值
out = self.out(r_out[:, -1, :])
return out

rnn = RNN()
print(rnn)
"""
RNN (
(rnn): LSTM(28, 64, batch_first=True)
(out): Linear (64 -> 10)
)
"""