Way2AI · PyTorch | 愚苏记

TL;DR

Way2AI系列，确保出发去"改变世界"之前，我们已经打下了一个坚实的基础。

本文简单介绍了PyTorch这个机器学习框架的必备知识。

Set up

首先，我们将导入NumPy和PyTorch库，并设置随机种子以实现可重复性。
请注意，PyTorch 也需要一个种子，因为我们将生成随机张量。

import numpy as np
import torch

SEED = 1024

# Set seed for reproducibility
np.random.seed(seed=SEED)
torch.manual_seed(SEED)

Basic

下面一些 PyTorch 的基础知识，例如如何创建张量以及将常见的数据结构（列表、数组等）转换为张量。

# Creating a random tensor
x = torch.randn(2, 3) # normal distribution (rand(2,3) -> uniform distribution)
print(f"Type: {x.type()}")
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

# Output
# Type: torch.FloatTensor
# Size: torch.Size([2, 3])
# Values:
# tensor([[-1.4837,  0.2671, -1.8337],
#         [-0.1047,  0.6002, -0.5496]])


# Zero and Ones tensor
x = torch.zeros(2, 3)
print (x)
x = torch.ones(2, 3)
print (x)

# Output
# tensor([[0., 0., 0.],
#         [0., 0., 0.]])
# tensor([[1., 1., 1.],
#         [1., 1., 1.]])


# List → Tensor
x = torch.Tensor([[1, 2, 3],[4, 5, 6]])
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[1., 2., 3.],
#         [4., 5., 6.]])


# NumPy array → Tensor
x = torch.Tensor(np.random.rand(2, 3))
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[0.4445, 0.3168, 0.9231],
#         [0.4659, 0.7984, 0.1992]])


# Changing tensor type
x = torch.Tensor(3, 4)
print(f"Type: {x.type()}")
x = x.long()
print(f"Type: {x.type()}")

# Output
# Type: torch.FloatTensor
# Type: torch.LongTensor

Operations

下面探索一些张量的基本操作。

# Addition
x = torch.randn(2, 3)
y = torch.randn(2, 3)
z = x + y
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[-0.4446,  0.4933, -1.4847],
#         [ 0.8493,  0.6911, -0.3357]])


# Dot product
x = torch.randn(2, 3)
y = torch.randn(3, 2)
z = torch.mm(x, y)
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

# Output
# Size: torch.Size([2, 2])
# Values:
# tensor([[ 0.2733, -4.0392],
#         [ 1.6385, -4.7220]])


# Transpose
x = torch.randn(2, 3)
print(f"Size: {x.shape}")
print(f"Values: \n{x}")
y = torch.t(x)
print(f"Size: {y.shape}")
print(f"Values: \n{y}")

# Output
# Size: torch.Size([2, 3])
# Values:
# tensor([[ 0.5920, -0.6301, -0.8856],
#         [ 1.2261, -0.4671, -1.0279]])
# Size: torch.Size([3, 2])
# Values:
# tensor([[ 0.5920,  1.2261],
#         [-0.6301, -0.4671],
#         [-0.8856, -1.0279]])


# Reshape
x = torch.randn(2, 3)
z = x.view(3, 2)
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

# Output
# Size: torch.Size([3, 2])
# Values:
# tensor([[-1.0387,  0.1039],
#         [ 0.5989, -1.4801],
#         [-0.8618, -0.9181]])


# Dangers of reshaping (unintended consequences)
x = torch.tensor([
    [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]],
    [[10, 10, 10, 10], [20, 20, 20, 20], [30, 30, 30, 30]]
])
print(f"Size: {x.shape}")
print(f"x: \n{x}\n")

a = x.view(x.size(1), -1)
print(f"\nSize: {a.shape}")
print(f"a: \n{a}\n")

b = x.transpose(0, 1).contiguous()
print(f"\nSize: {b.shape}")
print(f"b: \n{b}\n")

c = b.view(b.size(0), -1)
print(f"\nSize: {c.shape}")
print(f"c: \n{c}")

# Output
# Size: torch.Size([2, 3, 4])
# x:
# tensor([[[ 1,  1,  1,  1],
#          [ 2,  2,  2,  2],
#          [ 3,  3,  3,  3]],
# 
#         [[10, 10, 10, 10],
#          [20, 20, 20, 20],
#          [30, 30, 30, 30]]])
# 
# 
# Size: torch.Size([3, 8])
# a:
# tensor([[ 1,  1,  1,  1,  2,  2,  2,  2],
#         [ 3,  3,  3,  3, 10, 10, 10, 10],
#         [20, 20, 20, 20, 30, 30, 30, 30]])
# 
# 
# Size: torch.Size([3, 2, 4])
# b:
# tensor([[[ 1,  1,  1,  1],
#          [10, 10, 10, 10]],
# 
#         [[ 2,  2,  2,  2],
#          [20, 20, 20, 20]],
# 
#         [[ 3,  3,  3,  3],
#          [30, 30, 30, 30]]])
# 
# 
# Size: torch.Size([3, 8])
# c:
# tensor([[ 1,  1,  1,  1, 10, 10, 10, 10],
#         [ 2,  2,  2,  2, 20, 20, 20, 20],
#         [ 3,  3,  3,  3, 30, 30, 30, 30]])


# Dimensional operations
x = torch.randn(2, 3)
print(f"Values: \n{x}")
y = torch.sum(x, dim=0) # add each row's value for every column
print(f"Values: \n{y}")
z = torch.sum(x, dim=1) # add each columns's value for every row
print(f"Values: \n{z}")

# Output
# Values:
# tensor([[-0.0355,  0.4145,  0.6798],
#         [-0.2936,  0.1872, -0.2724]])
# Values:
# tensor([-0.3292,  0.6017,  0.4074])
# Values:
# tensor([ 1.0588, -0.3788])

Indexing

可以使用索引从张量中提取指定的值。

x = torch.randn(3, 4)
print (f"x: \n{x}")
print (f"x[:1]: \n{x[:1]}")
print (f"x[:1, 1:3]: \n{x[:1, 1:3]}")

# Output
x:
# tensor([[-0.5524, -0.8358, -2.8240,  0.2564],
#         [ 0.5045, -1.1290,  0.7631,  1.0155],
#         [-1.2475, -0.0335,  0.5442,  0.4280]])
# x[:1]:
# tensor([[-0.5524, -0.8358, -2.8240,  0.2564]])
# x[:1, 1:3]:
# tensor([[-0.8358, -2.8240]])

Slicing

# Select with dimensional indices
x = torch.randn(2, 3)
print(f"Values: \n{x}")

col_indices = torch.LongTensor([0, 2])
chosen = torch.index_select(x, dim=1, index=col_indices) # values from column 0 & 2
print(f"Values: \n{chosen}")

row_indices = torch.LongTensor([0, 1])
col_indices = torch.LongTensor([0, 2])
chosen = x[row_indices, col_indices] # values from (0, 0) & (1, 2)
print(f"Values: \n{chosen}")

# Output
# Values:
# tensor([[-1.6357,  0.7964,  0.9450],
#         [-1.6535,  1.8129,  0.9162]])
# Values:
# tensor([[-1.6357,  0.9450],
#         [-1.6535,  0.9162]])
# Values:
# tensor([-1.6357,  0.9162])

Joining

我们还可以使用concatenate和stack来合并张量。

x = torch.randn(2, 3)
print (x)
print (x.shape)

# Output
# tensor([[-0.7004,  0.8429,  0.8971],
#         [-0.0272,  0.4722,  1.1621]])
# torch.Size([2, 3])


# Concatenation
y = torch.cat([x, x], dim=0) # concat on a specified dimension
print (y)
print (y.shape)

# Output
# tensor([[-0.7004,  0.8429,  0.8971],
#         [-0.0272,  0.4722,  1.1621],
#         [-0.7004,  0.8429,  0.8971],
#         [-0.0272,  0.4722,  1.1621]])
# torch.Size([4, 3])

# Stacking
z = torch.stack([x, x], dim=0) # stack on new dimension
print (z)
print (z.shape)

# Output
# tensor([[[-0.7004,  0.8429,  0.8971],
#          [-0.0272,  0.4722,  1.1621]],
# 
#         [[-0.7004,  0.8429,  0.8971],
#          [-0.0272,  0.4722,  1.1621]]])
# torch.Size([2, 2, 3])

Gradients 梯度

我们可以使用梯度追踪(gradient bookkeeping) 来计算张量相对于其组成部分的梯度（变化率）。

梯度是机器学习和深度学习中最重要的概念，没有之一。后续会进一步介绍，这里先简单示例PyTorch如何计算某个函数在某点处的梯度:

$$
y = 3x + 2
$$

$$
z = \sum(y/N)
$$

$$
\frac{\partial(z)}{\partial(x)} = \frac{\partial(z)}{\partial(y)} \cdot \frac{\partial(y)}{\partial(x)} = \frac{1}{N} \cdot 3 = \frac{1}{3 \cdot 4} \cdot 3 = 0.25
$$

# Tensors with gradient bookkeeping
x = torch.rand(3, 4, requires_grad=True)  # requires_grad=True 表示此处需要计算梯度
y = 3*x + 2
z = y.mean()
z.backward()  # 此时开始计算梯度
print (f"x: \n{x}")
print (f"x.grad: \n{x.grad}")

# Output
# x:
# tensor([[0.1154, 0.1101, 0.4831, 0.1580],
#         [0.4459, 0.2242, 0.9525, 0.8113],
#         [0.0387, 0.1512, 0.9678, 0.7512]], requires_grad=True)
# x.grad:
# tensor([[0.2500, 0.2500, 0.2500, 0.2500],
#         [0.2500, 0.2500, 0.2500, 0.2500],
#         [0.2500, 0.2500, 0.2500, 0.2500]])

CUDA

我们可以使用CUDA（Nvidia的并行计算平台和API）将张量加载到GPU上进行并行计算。

import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

x = torch.rand(2,3)
print (x.is_cuda)
x = torch.rand(2,3).to(device)
print (x.is_cuda)

# Output
# False
# True

Citation

@article{madewithml,
    author       = {Goku Mohandas},
    title        = { PyTorch - Made With ML },
    howpublished = {\url{https://madewithml.com/}},
    year         = {2022}
}

Ending

到这里，便拥有了Way2AI路上需要的PyTorch的必备知识。我们可以发现，PyTorch的基础操作，与NumPy其实没什么太大的差别。

事实上NumPy和PyTorch可以相互转换，PyTorch提供了与NumPy兼容的接口，可以方便地将数据从NumPy数组转换为PyTorch张量，并在它们之间进行转换。这使得在使用PyTorch进行深度学习时，可以利用NumPy的强大功能进行数据预处理和后处理。

一般地，当我们需要进行常规的数值计算、数组操作和数学函数应用时，可以使用NumPy。当我们需要构建、训练和部署神经网络模型时，可以使用PyTorch。

PyTorch官网上有关于PyTorch的全部知识。