TL;DR

Way2AI系列,确保出发去"改变世界"之前,我们已经打下了一个坚实的基础。

本文简单介绍了PyTorch这个机器学习框架的必备知识。

Set up

首先,我们将导入NumPy和PyTorch库,并设置随机种子以实现可重复性。
请注意,PyTorch 也需要一个种子,因为我们将生成随机张量。

1
2
3
4
5
6
7
8
import numpy as np
import torch

SEED = 1024

# Set seed for reproducibility
np.random.seed(seed=SEED)
torch.manual_seed(SEED)

Basic

下面一些 PyTorch 的基础知识,例如如何创建张量以及将常见的数据结构(列表、数组等)转换为张量。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Creating a random tensor
x = torch.randn(2, 3) # normal distribution (rand(2,3) -> uniform distribution)
print(f"Type: {x.type()}")
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

# Output
# Type: torch.FloatTensor
# Size: torch.Size([2, 3])
# Values:
# tensor([[-1.4837, 0.2671, -1.8337],
# [-0.1047, 0.6002, -0.5496]])


# Zero and Ones tensor
x = torch.zeros(2, 3)
print (x)
x = torch.ones(2, 3)
print (x)

# Output
# tensor([[0., 0., 0.],
# [0., 0., 0.]])
# tensor([[1., 1., 1.],
# [1., 1., 1.]])


# List → Tensor
x = torch.Tensor([[1, 2, 3],[4, 5, 6]])
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[1., 2., 3.],
# [4., 5., 6.]])


# NumPy array → Tensor
x = torch.Tensor(np.random.rand(2, 3))
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[0.4445, 0.3168, 0.9231],
# [0.4659, 0.7984, 0.1992]])


# Changing tensor type
x = torch.Tensor(3, 4)
print(f"Type: {x.type()}")
x = x.long()
print(f"Type: {x.type()}")

# Output
# Type: torch.FloatTensor
# Type: torch.LongTensor

Operations

下面探索一些张量的基本操作。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Addition
x = torch.randn(2, 3)
y = torch.randn(2, 3)
z = x + y
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[-0.4446, 0.4933, -1.4847],
# [ 0.8493, 0.6911, -0.3357]])


# Dot product
x = torch.randn(2, 3)
y = torch.randn(3, 2)
z = torch.mm(x, y)
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

# Output
# Size: torch.Size([2, 2])
# Values:
# tensor([[ 0.2733, -4.0392],
# [ 1.6385, -4.7220]])


# Transpose
x = torch.randn(2, 3)
print(f"Size: {x.shape}")
print(f"Values: \n{x}")
y = torch.t(x)
print(f"Size: {y.shape}")
print(f"Values: \n{y}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[ 0.5920, -0.6301, -0.8856],
# [ 1.2261, -0.4671, -1.0279]])
# Size: torch.Size([3, 2])
# Values:
# tensor([[ 0.5920, 1.2261],
# [-0.6301, -0.4671],
# [-0.8856, -1.0279]])


# Reshape
x = torch.randn(2, 3)
z = x.view(3, 2)
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

# Output
# Size: torch.Size([3, 2])
# Values:
# tensor([[-1.0387, 0.1039],
# [ 0.5989, -1.4801],
# [-0.8618, -0.9181]])


# Dangers of reshaping (unintended consequences)
x = torch.tensor([
[[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]],
[[10, 10, 10, 10], [20, 20, 20, 20], [30, 30, 30, 30]]
])
print(f"Size: {x.shape}")
print(f"x: \n{x}\n")

a = x.view(x.size(1), -1)
print(f"\nSize: {a.shape}")
print(f"a: \n{a}\n")

b = x.transpose(0, 1).contiguous()
print(f"\nSize: {b.shape}")
print(f"b: \n{b}\n")

c = b.view(b.size(0), -1)
print(f"\nSize: {c.shape}")
print(f"c: \n{c}")

# Output
# Size: torch.Size([2, 3, 4])
# x:
# tensor([[[ 1, 1, 1, 1],
# [ 2, 2, 2, 2],
# [ 3, 3, 3, 3]],
#
# [[10, 10, 10, 10],
# [20, 20, 20, 20],
# [30, 30, 30, 30]]])
#
#
# Size: torch.Size([3, 8])
# a:
# tensor([[ 1, 1, 1, 1, 2, 2, 2, 2],
# [ 3, 3, 3, 3, 10, 10, 10, 10],
# [20, 20, 20, 20, 30, 30, 30, 30]])
#
#
# Size: torch.Size([3, 2, 4])
# b:
# tensor([[[ 1, 1, 1, 1],
# [10, 10, 10, 10]],
#
# [[ 2, 2, 2, 2],
# [20, 20, 20, 20]],
#
# [[ 3, 3, 3, 3],
# [30, 30, 30, 30]]])
#
#
# Size: torch.Size([3, 8])
# c:
# tensor([[ 1, 1, 1, 1, 10, 10, 10, 10],
# [ 2, 2, 2, 2, 20, 20, 20, 20],
# [ 3, 3, 3, 3, 30, 30, 30, 30]])


# Dimensional operations
x = torch.randn(2, 3)
print(f"Values: \n{x}")
y = torch.sum(x, dim=0) # add each row's value for every column
print(f"Values: \n{y}")
z = torch.sum(x, dim=1) # add each columns's value for every row
print(f"Values: \n{z}")

# Output
# Values:
# tensor([[-0.0355, 0.4145, 0.6798],
# [-0.2936, 0.1872, -0.2724]])
# Values:
# tensor([-0.3292, 0.6017, 0.4074])
# Values:
# tensor([ 1.0588, -0.3788])

Indexing

可以使用索引从张量中提取指定的值。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
x = torch.randn(3, 4)
print (f"x: \n{x}")
print (f"x[:1]: \n{x[:1]}")
print (f"x[:1, 1:3]: \n{x[:1, 1:3]}")

# Output
x:
# tensor([[-0.5524, -0.8358, -2.8240, 0.2564],
# [ 0.5045, -1.1290, 0.7631, 1.0155],
# [-1.2475, -0.0335, 0.5442, 0.4280]])
# x[:1]:
# tensor([[-0.5524, -0.8358, -2.8240, 0.2564]])
# x[:1, 1:3]:
# tensor([[-0.8358, -2.8240]])

Slicing

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Select with dimensional indices
x = torch.randn(2, 3)
print(f"Values: \n{x}")

col_indices = torch.LongTensor([0, 2])
chosen = torch.index_select(x, dim=1, index=col_indices) # values from column 0 & 2
print(f"Values: \n{chosen}")

row_indices = torch.LongTensor([0, 1])
col_indices = torch.LongTensor([0, 2])
chosen = x[row_indices, col_indices] # values from (0, 0) & (1, 2)
print(f"Values: \n{chosen}")

# Output
# Values:
# tensor([[-1.6357, 0.7964, 0.9450],
# [-1.6535, 1.8129, 0.9162]])
# Values:
# tensor([[-1.6357, 0.9450],
# [-1.6535, 0.9162]])
# Values:
# tensor([-1.6357, 0.9162])

Joining

我们还可以使用concatenate和stack来合并张量。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
x = torch.randn(2, 3)
print (x)
print (x.shape)

# Output
# tensor([[-0.7004, 0.8429, 0.8971],
# [-0.0272, 0.4722, 1.1621]])
# torch.Size([2, 3])


# Concatenation
y = torch.cat([x, x], dim=0) # concat on a specified dimension
print (y)
print (y.shape)

# Output
# tensor([[-0.7004, 0.8429, 0.8971],
# [-0.0272, 0.4722, 1.1621],
# [-0.7004, 0.8429, 0.8971],
# [-0.0272, 0.4722, 1.1621]])
# torch.Size([4, 3])

# Stacking
z = torch.stack([x, x], dim=0) # stack on new dimension
print (z)
print (z.shape)

# Output
# tensor([[[-0.7004, 0.8429, 0.8971],
# [-0.0272, 0.4722, 1.1621]],
#
# [[-0.7004, 0.8429, 0.8971],
# [-0.0272, 0.4722, 1.1621]]])
# torch.Size([2, 2, 3])

Gradients 梯度

我们可以使用梯度追踪(gradient bookkeeping) 来计算张量相对于其组成部分的梯度(变化率)。

梯度是机器学习和深度学习中最重要的概念,没有之一。后续会进一步介绍,这里先简单示例PyTorch如何计算某个函数在某点处的梯度:

$$
y = 3x + 2
$$

$$
z = \sum(y/N)
$$

$$
\frac{\partial(z)}{\partial(x)} = \frac{\partial(z)}{\partial(y)} \cdot \frac{\partial(y)}{\partial(x)} = \frac{1}{N} \cdot 3 = \frac{1}{3 \cdot 4} \cdot 3 = 0.25
$$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Tensors with gradient bookkeeping
x = torch.rand(3, 4, requires_grad=True) # requires_grad=True 表示此处需要计算梯度
y = 3*x + 2
z = y.mean()
z.backward() # 此时开始计算梯度
print (f"x: \n{x}")
print (f"x.grad: \n{x.grad}")

# Output
# x:
# tensor([[0.1154, 0.1101, 0.4831, 0.1580],
# [0.4459, 0.2242, 0.9525, 0.8113],
# [0.0387, 0.1512, 0.9678, 0.7512]], requires_grad=True)
# x.grad:
# tensor([[0.2500, 0.2500, 0.2500, 0.2500],
# [0.2500, 0.2500, 0.2500, 0.2500],
# [0.2500, 0.2500, 0.2500, 0.2500]])

CUDA

我们可以使用CUDA(Nvidia的并行计算平台和API)将张量加载到GPU上进行并行计算。

1
2
3
4
5
6
7
8
9
10
11
12
13
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

x = torch.rand(2,3)
print (x.is_cuda)
x = torch.rand(2,3).to(device)
print (x.is_cuda)

# Output
# False
# True

Citation

1
2
3
4
5
6
@article{madewithml,
author = {Goku Mohandas},
title = { PyTorch - Made With ML },
howpublished = {\url{https://madewithml.com/}},
year = {2022}
}

Ending

到这里,便拥有了Way2AI路上需要的PyTorch的必备知识。我们可以发现,PyTorch的基础操作,与NumPy其实没什么太大的差别。

事实上NumPy和PyTorch可以相互转换,PyTorch提供了与NumPy兼容的接口,可以方便地将数据从NumPy数组转换为PyTorch张量,并在它们之间进行转换。这使得在使用PyTorch进行深度学习时,可以利用NumPy的强大功能进行数据预处理和后处理。

一般地,当我们需要进行常规的数值计算、数组操作和数学函数应用时,可以使用NumPy。当我们需要构建、训练和部署神经网络模型时,可以使用PyTorch。

PyTorch官网 上有关于PyTorch的全部知识。