文件名:【PyTorch】多项式回归,德拉吉
【PyTorch】多项式回归
文章目录 1. 模型与代码实现1.1. 模型1.2. 代码实现1.2.1. 完整代码1.2.2. 输出结果 2. Q&A2.1. 欠拟合与过拟合
1. 模型与代码实现 1.1. 模型 将多项式特征值预处理为线性模型的特征值。即
y = w 0 + w 1 x + w 2 x 2 + ⋯ + w n x n y = w_0+w_1x+w_2x^2+\dots+w_nx^n y=w0+w1x+w2x2+⋯+wnxn变换为
y = w 0 + w 1 z 1 + w 2 z 2 + ⋯ + w n z n y=w_0+w_1z_1+w_2z_2+\dots+w_nz_n y=w0+w1z1+w2z2+⋯+wnzn为了避免指数值过大,可以将
x i x^i xi调整为
x i i ! \frac{x^i}{i!} i!xi,即
y = w 0 + w 1 x 1 ! + w 2 x 2 2 ! + ⋯ + w n x n n ! y = w_0+w_1\frac{x}{1!}+w_2\frac{x^2}{2!}+\dots+w_n\frac{x^n}{n!} y=w0+w11!x+w22!x2+⋯+wnn!xn 1.2. 代码实现 1.2.1. 完整代码
import os
import numpy
as np
import math
, torch
from d2l
import torch
as d2l
import torch
.nn
as nn
from torch
.utils
.data
import DataLoader
, TensorDataset
from tensorboardX
import SummaryWriter
from rich
.progress
import track
def evaluate_loss(dataloader
):"""评估给定数据集上模型的损失"""metric
.reset
()with torch
.no_grad
():for X
, y
in dataloader
:X
, y
= X
.to
(device
, non_blocking
=True), y
.to
(device
, non_blocking
=True)loss
= criterion
(net
(X
), y
)metric
.add
(loss
.sum(), loss
.numel
())return metric
[0] / metric
[1]def load_dataset(data_arrays
):"""加载数据集"""dataset
= TensorDataset
(*data_arrays
)return DataLoader
(dataset
, batch_size
, shuffle
=True, pin_memory
=True,num_workers
=num_workers
, prefetch_factor
=prefetch_factor
)if __name__
== '__main__':learning_rate
= 0.01device
= torch
.device
("cuda" if torch
.cuda
.is_available
() else "cpu")num_epochs
= 400batch_size
= 10num_workers
= 0prefetch_factor
= 2max_degree
= 20 model_degree
= 1 n_train
, n_test
= 100, 100 true_w
= np
.zeros
(max_degree
+1)true_w
[0:4] = np
.array
([5, 1.2, -3.4, 5.6])def get_logdir():root
= 'runs'if not os
.path
.exists
(root
):os
.mkdir
(root
)order
= len(os
.listdir
(root
)) + 1return f'runs/exp{order}'writer
= SummaryWriter
(get_logdir
())features
= np
.random
.normal
(size
=(n_train
+ n_test
, 1))np
.random
.shuffle
(features
)poly_features
= np
.power
(features
, np
.arange
(max_degree
+1).reshape
(1, -1))for i
in range(max_degree
+1):poly_features
[:, i
] /= math
.gamma
(i
+ 1) labels
= np
.dot
(poly_features
, true_w
)labels
+= np
.random
.normal
(scale
=0.1, size
=labels
.shape
) poly_features
, labels
= [torch
.as_tensor
(x
, dtype
=torch
.float32
) for x
in [poly_features
, labels
]]net
= nn
.Sequential
(nn
.Linear
(model_degree
+1, 1, bias
=False)).to
(device
, non_blocking
=True)def init_weights(m
):if type(m
) == nn
.Linear
:nn
.init
.normal_
(m
.weight
, mean
=0, std
=0.01)net
.apply(init_weights
)criterion
= nn
.MSELoss
(reduction
='none')optimizer
= torch
.optim
.SGD
(net
.parameters
(), lr
=learning_rate
)features_train
, labels_train
= poly_features
[:n_train
, :model_degree
+1], labels
[:n_train
].reshape
(-1, 1)features_test
, labels_test
= poly_features
[n_train
:, :model_degree
+1], labels
[n_train
:].reshape
(-1, 1)dataloader_train
= load_dataset
((features_train
, labels_train
))dataloader_test
= load_dataset
((features_test
, labels_test
))metric
= d2l
.Accumulator
(2) for epoch
in track
(range(num_epochs
)):for X
, y
in dataloader_train
:X
, y
= X
.to
(device
, non_blocking
=True), y
.to
(device
, non_blocking
=True)loss
= criterion
(net
(X
), y
)optimizer
.zero_grad
()loss
.mean
().backward
()optimizer
.step
()writer
.add_scalars
(f"{model_degree}-degree", {"train_loss": evaluate_loss
(dataloader_train
),"test_loss": evaluate_loss
(dataloader_test
),}, epoch
)print("weights =", net
[0].weight
.data
.cpu
().numpy
())writer
.close
() 1.2.2. 输出结果
采用1阶多项式(线性模型)拟合:
采用3阶多项式拟合
采用20阶多项式拟合
2. Q&A 2.1. 欠拟合与过拟合
数据集是按照3阶多项式生成的。使用1阶多项式去拟合,发现最后损失始终降不下去,这种情况称为欠拟合,说明模型复杂度不够;使用20阶多项式去拟合,发现测试损失最后还增长了,训练和测试损失总体也比3阶多项式模型的值高,这种情况称为过拟合,说明模型太复杂了,训练过程受到了噪声的影响。