nn/test.ipynb
2026-03-19 13:47:14 +08:00

1362 lines
38 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2026-03-19T05:45:23.532371843Z",
"start_time": "2026-03-19T05:45:23.227305138Z"
}
},
"source": [
"import torch\n",
"import numpy\n",
"import pandas"
],
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'torch'",
"output_type": "error",
"traceback": [
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
"\u001B[31mModuleNotFoundError\u001B[39m Traceback (most recent call last)",
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[1]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mtorch\u001B[39;00m\n\u001B[32m 2\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mnumpy\u001B[39;00m\n\u001B[32m 3\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mpandas\u001B[39;00m\n",
"\u001B[31mModuleNotFoundError\u001B[39m: No module named 'torch'"
]
}
],
"execution_count": 1
},
{
"metadata": {},
"cell_type": "code",
"source": "torch.randn(3,4,2)",
"id": "3e141a42d342fa96",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n",
"Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])\n",
"torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)"
],
"id": "8ae20ae68abbf32f",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"a = torch.arange(3).reshape((3, 1))\n",
"b = torch.arange(2).reshape((1, 2))\n",
"a, b\n",
"a+b"
],
"id": "2960a1ded2cdd5a4",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "X[-1], X[1:3]\n",
"id": "69c2ec23ab6ae97c",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"A = X.numpy()\n",
"B = torch.tensor(A)\n",
"type(A), type(B)"
],
"id": "b8d779a1bc7e4b1a",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import os\n",
"os.makedirs(os.path.join(\"..\",\"data\"),exist_ok=True)\n",
"data_file = os.path.join(os.path.join(\"..\",\"data\",\"data.csv\"))\n",
"with open(data_file, \"w\") as f:\n",
" f.write('NumRooms,Alley,Price\\n') # 列名\n",
" f.write('NA,Pave,127500\\n') # 每行表示一个数据样本\n",
" f.write('2,NA,106000\\n')\n",
" f.write('4,NA,178100\\n')\n",
" f.write('NA,NA,140000\\n')\n",
"\n"
],
"id": "82be028b0f1dd1e3",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"data = pd.read_csv(data_file)\n",
"print(data)\n"
],
"id": "ddd789a2656899d1",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]\n",
"\n",
"\n",
"inputs = pd.get_dummies(inputs, dummy_na=True)\n",
"print(inputs)\n",
"inputs = inputs.fillna(inputs.mean())\n",
"print(inputs)\n"
],
"id": "e98fcc3bd4f067cf",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"X = torch.tensor(inputs.to_numpy(dtype=float))\n",
"y = torch.tensor(outputs.to_numpy(dtype=float))\n",
"X, y\n"
],
"id": "8ff0f7b40f0e4996",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"B=torch.tensor([[1,2,3],[2,0,4],[3,4,5]])\n",
"B"
],
"id": "91a6e0da442b95a0",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "B==B.T",
"id": "297e6a678fb19be7",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"X=torch.arange(24).reshape(2,3,4)\n",
"X"
],
"id": "24e864b336beb58b",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"A = torch.arange(20, dtype=torch.float32).reshape(5, 4)\n",
"B = A.clone() # 通过分配新内存将A的一个副本分配给B\n",
"A, A + B\n",
"#A = torch.arange(20, dtype=torch.float32).reshape(5, 4)\n",
"#B = A # 通过分配新内存将A的一个副本分配给B\n",
"id(A),id(B)"
],
"id": "ee0905479b1dbc2b",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Hadamard乘积",
"id": "136459f5efe765cf"
},
{
"metadata": {},
"cell_type": "code",
"source": "A*B",
"id": "f576b0df17cc0e98",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"a=2\n",
"X=torch.arange(24).reshape(2,3,4)\n",
"a+X,(a*X).shape"
],
"id": "b2373af1d7f2a45",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"print(A)\n",
"A_sum_axis0=A.sum(axis=0)\n",
"A_sum_axis1=A.sum(axis=1)\n",
"A_sum_axis0,A_sum_axis1"
],
"id": "2b50246e1ca8a3bc",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x=torch.arange(4,dtype=torch.float32)\n",
"torch.mv(A,x)"
],
"id": "3195464dfeb554ed",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import time\n",
"\n",
"def showtime(func):\n",
" def wrapper():\n",
" start = time.time()\n",
" result = func() # 执行原始函数\n",
" end = time.time()\n",
" print(f\"执行时间: {end - start:.6f}秒\")\n",
" return result\n",
" return wrapper # 返回包装函数\n",
"\n",
"@showtime\n",
"def fun():\n",
" print(\"I am silly\")\n",
"\n",
"fun()\n"
],
"id": "ebda8c74ead3e42b",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "torch.norm(torch.ones((4, 9)))",
"id": "3343cc0c01d0161c",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x =torch.arange(4.0,requires_grad=True)\n",
"x.grad"
],
"id": "674e2416e9417cfe",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"y=2*torch.dot(x,x)\n",
"y"
],
"id": "66c0febebcf98cde",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"y.backward()\n",
"x.grad"
],
"id": "825f2ce6c46ca4a8",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x.grad.zero_()\n",
"y = x.sum()\n",
"y.backward()\n",
"x.grad\n"
],
"id": "df399463515e9d3c",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# 对非标量调用backward需要传入一个gradient参数该参数指定微分函数关于self的梯度。\n",
"# 本例只想求偏导数的和所以传递一个1的梯度是合适的\n",
"x.grad.zero_()\n",
"y = x * x\n",
"# 等价于y.backward(torch.ones(len(x)))\n",
"print(y)\n",
"y.sum().backward()\n",
"x.grad"
],
"id": "f9207619bd4b3de8",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "torch.ones(len(x))",
"id": "409c14c230570859",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x.grad.zero_()\n",
"y=x*x\n",
"u=y.detach()\n",
"z=u*x\n",
"z.sum().backward()\n",
"x.grad==u"
],
"id": "521b948fe0683b12",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x.grad.zero_()\n",
"y.sum().backward()\n",
"x.grad==2*x"
],
"id": "b040beecf0632315",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"from torch.distributions import multinomial\n",
"fair_probs=torch.ones([6])\n",
"fair_probs"
],
"id": "4e6ec763dbea5aa3",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "multinomial.Multinomial(1, fair_probs).sample()",
"id": "f12d5e85bc6ab595",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"counts = multinomial.Multinomial(10, fair_probs).sample((500,))\n",
"\n",
"cum_counts = counts.cumsum(dim=0)\n",
"cum_counts.size()"
],
"id": "b02f43376fd6f1fe",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# 假设 estimates 是你的数据张量\n",
"estimates = cum_counts / cum_counts.sum(dim=1, keepdims=True)\n",
"\n",
"# 设置图形大小 (等效于 d2l.set_figsize)\n",
"plt.figure(figsize=(6, 4.5))\n",
"\n",
"# 绘制每条概率曲线\n",
"for i in range(6):\n",
" plt.plot(estimates[:, i].numpy(),\n",
" label=f\"P(die={i + 1})\") # 使用 f-string 更简洁\n",
"\n",
"# 添加理论概率水平线\n",
"plt.axhline(y=0.167, color='black', linestyle='dashed', label='Theoretical probability')\n",
"\n",
"# 设置坐标轴标签\n",
"plt.xlabel('Groups of experiments')\n",
"plt.ylabel('Estimated probability')\n",
"\n",
"# 添加图例\n",
"plt.legend()\n",
"\n",
"# 显示图形\n",
"plt.show()\n",
"#plt.savefig('dice_probability.png', bbox_inches='tight')"
],
"id": "8b80daa4edd0b066",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import numpy as np\n",
"class Timer:\n",
" \"\"\"记录多次运行时间\"\"\"\n",
" def __init__(self):\n",
" self.times = []\n",
" self.start()\n",
" def start(self):\n",
" \"\"\"启动计时器\"\"\"\n",
" self.tik = time.time()\n",
" def stop(self):\n",
" \"\"\"停止计时器并将时间记录在列表中\"\"\"\n",
" self.times.append(time.time() - self.tik)\n",
" return self.times[-1]\n",
" def avg(self):\n",
" \"\"\"返回平均时间\"\"\"\n",
" return sum(self.times) / len(self.times)\n",
" def sum(self):\n",
" \"\"\"返回时间总和\"\"\"\n",
" return sum(self.times)\n",
" def cumsum(self):\n",
" \"\"\"返回累计时间\"\"\"\n",
" return np.array(self.times).cumsum().tolist()"
],
"id": "4bdbb4999907154a",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"n = 10000\n",
"a = torch.ones([n])\n",
"b = torch.ones([n])\n",
"c=torch.zeros(n)\n",
"timer = Timer()\n",
"for i in range(n):\n",
" c[i]=a[i]+b[i]\n",
"f'{timer.stop():.5f} sec'"
],
"id": "c6f71622e2cc578a",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"timer.start()\n",
"d=a+b\n",
"f'{timer.stop():.5f} sec'"
],
"id": "2578c79b1214a79f",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import math\n",
"def normal(x, mu, sigma):\n",
" p = 1 / math.sqrt(2 * math.pi * sigma**2)\n",
" return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)"
],
"id": "fd17fdbe38a5f79",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"from matplotlib_inline import backend_inline\n",
"def use_svg_display(): #@save\n",
" \"\"\"使用svg格式在Jupyter中显示绘图\"\"\"\n",
" backend_inline.set_matplotlib_formats('svg')\n",
"def set_figsize(figsize=(3.5, 2.5)): #@save\n",
" \"\"\"设置matplotlib的图表大小\"\"\"\n",
" use_svg_display()\n",
" plt.rcParams['figure.figsize'] = figsize\n",
"def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n",
" \"\"\"设置matplotlib的轴\"\"\"\n",
" axes.set_xlabel(xlabel)\n",
" axes.set_ylabel(ylabel)\n",
" axes.set_xscale(xscale)\n",
" axes.set_yscale(yscale)\n",
" axes.set_xlim(xlim)\n",
" axes.set_ylim(ylim)\n",
" if legend:\n",
" axes.legend(legend)\n",
" axes.grid()\n",
"def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,\n",
"ylim=None, xscale='linear', yscale='linear',\n",
"fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n",
" \"\"\"绘制数据点\"\"\"\n",
" if legend is None:\n",
" legend = []\n",
" set_figsize(figsize)\n",
" axes = axes if axes else plt.gca()\n",
" # 如果X有一个轴输出True\n",
" def has_one_axis(X):\n",
" return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n",
"and not hasattr(X[0], \"__len__\"))\n",
" if has_one_axis(X):\n",
" X = [X]\n",
" if Y is None:\n",
" X, Y = [[]] * len(X), X\n",
" elif has_one_axis(Y):\n",
" Y = [Y]\n",
" if len(X) != len(Y):\n",
" X = X * len(Y)\n",
" axes.cla()\n",
" for x, y, fmt in zip(X, Y, fmts):\n",
" if len(x):\n",
" axes.plot(x, y, fmt)\n",
" else:\n",
" axes.plot(y, fmt)\n",
" set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)"
],
"id": "82158a69cba14da0",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"# 再次使用numpy进行可视化\n",
"x = np.arange(-7, 7, 0.01)\n",
"# 均值和标准差对\n",
"params = [(0, 1), (0, 2), (3, 1)]\n",
"plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x',\n",
"ylabel='p(x)', figsize=(4.5, 2.5),\n",
"legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])"
],
"id": "f69ac10ebc3d13d8",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"#注意一下matmul做向量乘上矩阵的时候不用考虑转置的情况\n",
"def synthetic_data(w, b, num_examples): #@save\n",
" \"\"\"生成y=Xw+b+噪声\"\"\"\n",
" X = torch.normal(0, 1, (num_examples, len(w)))\n",
" y = torch.matmul(X, w) + b\n",
" y += torch.normal(0, 0.01, y.shape)\n",
" return X, y.reshape((-1, 1))\n"
],
"id": "7ed837bdd2b3a26d",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"true_w = torch.tensor([2, -3.4])\n",
"true_b = 4.2\n",
"features, labels = synthetic_data(true_w, true_b, 1000)"
],
"id": "5ec2e204a6fd5cb2",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"set_figsize()\n",
"plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)"
],
"id": "38213d46b3d9900d",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"w=torch.normal(0,0.01,size=(2,1),requires_grad=True)\n",
"b=torch.zeros(1,requires_grad=True)\n",
"def linreg(X, w, b):\n",
" return torch.matmul(X,w)+b\n",
"def squared_loss(y_hat,y):\n",
" return (y_hat-y.reshape(y_hat.shape))**2/2\n",
"def sgd(params,lr,batch_size):\n",
" with torch.no_grad():\n",
" for param in params:\n",
" param-=lr*param.grad/batch_size\n",
" param.grad.zero_()\n",
"lr = 0.03\n",
"num_epochs =20\n",
"net = linreg\n",
"loss = squared_loss"
],
"id": "12166e1bc3ddd695",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import random\n",
"def data_iter(batch_size, features, labels):\n",
" num_examples = len(features)\n",
" indices = list(range(num_examples))\n",
" # 这些样本是随机读取的,没有特定的顺序\n",
" random.shuffle(indices)\n",
" for i in range(0, num_examples, batch_size):\n",
" batch_indices = torch.tensor(\n",
" indices[i: min(i + batch_size, num_examples)])\n",
" yield features[batch_indices], labels[batch_indices]"
],
"id": "f3b7ee9f326bc687",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"batch_size =10\n",
"for X,y in data_iter(batch_size, features, labels):\n",
" print(X,'\\n',y)\n",
" break"
],
"id": "f386e12d65afff2e",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"for epoch in range(num_epochs):\n",
" for X, y in data_iter(batch_size, features, labels):\n",
" l=loss(net(X, w, b), y)\n",
" l.sum().backward()\n",
" sgd([w,b],lr,batch_size)\n",
" with torch.no_grad():\n",
" train_l =loss(net(features, w, b), labels)\n",
" print(f'epoch {epoch+1}, train loss: {float(train_l.mean()):3f}')"
],
"id": "8888ab6adcec36f1",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')\n",
"print(f'b的估计误差: {true_b - b}')"
],
"id": "8199439fa7f26309",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"from torch.utils import data\n",
"true_w = torch.tensor([2,-3.4])\n",
"true_b = 4.2\n",
"features,labels=synthetic_data(true_w, true_b, 1000)\n",
"def load_array(data_arrays,batch_size,is_train=True):\n",
" dataset = data.TensorDataset(*data_arrays)\n",
" return data.DataLoader(dataset,batch_size,shuffle=is_train)\n",
"batch_size = 10\n",
"data_iter = load_array((features,labels),batch_size)"
],
"id": "560d537dcbb5a335",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"from torch import nn\n",
"net = nn.Sequential(nn.Linear(2, 1))\n",
"net[0].weight.data.normal_(0,0.001)\n",
"net[0].bias.data.fill_(0)"
],
"id": "c54fe059d6fd20de",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"loss = nn.MSELoss()\n",
"trainer = torch.optim.SGD(net.parameters(), lr=0.01)\n",
"num_epochs = 3\n",
"for epoch in range(num_epochs):\n",
" for X, y in data_iter:\n",
" l = loss(net(X) ,y)\n",
" trainer.zero_grad()\n",
" l.backward()\n",
" trainer.step()\n",
" l = loss(net(features), labels)\n",
" print(f'epoch {epoch + 1}, loss {l:f}')\n"
],
"id": "e8a44851125b7cc6",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"import torchvision\n",
"from torchvision import transforms\n",
"trans =transforms.ToTensor()\n",
"mnist_train = torchvision.datasets.FashionMNIST(root=\"./data\",train=True,transform=trans,download=False)\n",
"mnist_test = torchvision.datasets.FashionMNIST(root=\"./data\",train=False,transform=trans,download=False)\n"
],
"id": "bd4e8a65ccd03177",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"use_svg_display()\n",
"len(mnist_train),len(mnist_test)"
],
"id": "ed2c915af7f6a76a",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "mnist_train[0][0].shape",
"id": "4df1cbc292aa5981",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def get_fashion_mnist_labels(labels):\n",
" text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',\n",
"'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']\n",
" return [text_labels[int(i)] for i in labels]\n"
],
"id": "332f3d6da0bafbe8",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def show_images(imgs, num_rows, num_cols, titles=None, scale=1): #@save\n",
" \"\"\"绘制图像列表\"\"\"\n",
" figsize = (num_cols * scale, num_rows * scale)\n",
" _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)\n",
" axes = axes.flatten()\n",
" for i, (ax, img) in enumerate(zip(axes, imgs)):\n",
" if torch.is_tensor(img):\n",
" # 图片张量\n",
" ax.imshow(img.numpy())\n",
" else:\n",
" # PIL图片\n",
" ax.imshow(img)\n",
" ax.axes.get_xaxis().set_visible(False)\n",
" ax.axes.get_yaxis().set_visible(False)\n",
" if titles:\n",
" ax.set_title(titles[i])\n",
" return axes"
],
"id": "c83202fe9b0ab487",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))\n",
"print(X.shape)\n",
"show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y));"
],
"id": "cf4abd8370d55416",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"batch_size = 256\n",
"def get_dataloader_workers():\n",
" \"\"\"使用4个进程来读取数据\"\"\"\n",
" return 4\n",
"\n",
"train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True,\n",
"num_workers=get_dataloader_workers())\n",
"timer = Timer()\n",
"for X, y in train_iter:\n",
" continue\n",
"f'{timer.stop():.2f} sec'"
],
"id": "552769fbffc16142",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def load_data_fashion_mnist(batch_size, resize=None):\n",
" \"\"\"下载Fashion-MNIST数据集然后将其加载到内存中\"\"\"\n",
" trans = [transforms.ToTensor()]\n",
" if resize:\n",
" trans.insert(0, transforms.Resize(resize))\n",
" trans = transforms.Compose(trans)\n",
" mnist_train = torchvision.datasets.FashionMNIST(\n",
" root=\"./data\", train=True, transform=trans, download=False)\n",
" mnist_test = torchvision.datasets.FashionMNIST(\n",
" root=\"./data\", train=False, transform=trans, download=False)\n",
" return (data.DataLoader(mnist_train, batch_size, shuffle=True,\n",
" num_workers=get_dataloader_workers()),\n",
" data.DataLoader(mnist_test, batch_size, shuffle=False,\n",
" num_workers=get_dataloader_workers()))"
],
"id": "aa81880abd86cae6",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"train_iter, test_iter = load_data_fashion_mnist(32, resize=64)\n",
"for X, y in train_iter:\n",
" print(X.shape, X.dtype, y.shape, y.dtype)\n",
" break"
],
"id": "4248a103f745154",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"from IPython import display\n",
"batch_size = 256\n",
"train_iter, test_iter = load_data_fashion_mnist(32)\n",
"num_inputs = 784\n",
"num_outputs = 10\n",
"W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)\n",
"b = torch.zeros(num_outputs, requires_grad=True)\n",
"X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])\n",
"X.sum(0, keepdim=True), X.sum(1, keepdim=True)\n"
],
"id": "94c52f0cca88ef48",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def softmax(X):\n",
" X_exp = torch.exp(X)\n",
" partition = X_exp.sum(1, keepdim=True)\n",
" return X_exp / partition # 这里应用了广播机制\n",
"X = torch.normal(0, 1, (2, 5))\n",
"X_prob = softmax(X)\n",
"X_prob, X_prob.sum(1)"
],
"id": "c4ab34373c5a664e",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def net(X):\n",
" return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)"
],
"id": "6eacc53b2b9738af",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"y = torch.tensor([0, 2])\n",
"y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])\n",
"y_hat[[0, 1], y]"
],
"id": "698449b4dafb545c",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def cross_entropy(y_hat, y):\n",
" return - torch.log(y_hat[range(len(y_hat)), y])\n",
"cross_entropy(y_hat, y)"
],
"id": "1720369fc8568c8c",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def accuracy(y_hat, y): #@save\n",
" \"\"\"计算预测正确的数量\"\"\"\n",
" if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:\n",
" y_hat = y_hat.argmax(axis=1)\n",
" cmp = y_hat.type(y.dtype) == y\n",
" return float(cmp.type(y.dtype).sum())\n",
"\n",
"accuracy(y_hat, y)/len(y)"
],
"id": "e65719500a64ed87",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"class Accumulator: #@save\n",
" \"\"\"在n个变量上累加\"\"\"\n",
" def __init__(self, n):\n",
" self.data = [0.0] * n\n",
" def add(self, *args):\n",
" self.data = [a + float(b) for a, b in zip(self.data, args)]\n",
" def reset(self):\n",
" self.data = [0.0] * len(self.data)\n",
" def __getitem__(self, idx):\n",
" return self.data[idx]\n"
],
"id": "f1eebb35ff2e9fea",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def evaluate_accuracy(net, data_iter): #@save\n",
" \"\"\"计算在指定数据集上模型的精度\"\"\"\n",
" if isinstance(net, torch.nn.Module):\n",
" net.eval() # 将模型设置为评估模式\n",
" metric = Accumulator(2) # 正确预测数、预测总数\n",
" with torch.no_grad():\n",
" for X, y in data_iter:\n",
" metric.add(accuracy(net(X), y), y.numel())\n",
" return metric[0] / metric[1]\n"
],
"id": "bc2beb5f2d6afe7e",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "evaluate_accuracy(net, test_iter)",
"id": "65bcfb7e40c1a98b",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def train_epoch_ch3(net, train_iter, loss, updater): #@save\n",
" \"\"\"训练模型一个迭代周期定义见第3章\"\"\"\n",
" # 将模型设置为训练模式\n",
" if isinstance(net, torch.nn.Module):\n",
" net.train()\n",
" # 训练损失总和、训练准确度总和、样本数\n",
" metric = Accumulator(3)\n",
" for X, y in train_iter:\n",
" # 计算梯度并更新参数\n",
" y_hat = net(X)\n",
" l = loss(y_hat, y)\n",
" if isinstance(updater, torch.optim.Optimizer):\n",
" # 使用PyTorch内置的优化器和损失函数\n",
" updater.zero_grad()\n",
" l.mean().backward()\n",
" updater.step()\n",
" else:\n",
" # 使用定制的优化器和损失函数\n",
" l.sum().backward()\n",
" updater(X.shape[0])\n",
" metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())\n",
"# 返回训练损失和训练精度\n",
" return metric[0] / metric[2], metric[1] / metric[2]"
],
"id": "2faf1dcc6c023a53",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"class Animator: #@save\n",
" \"\"\"在动画中绘制数据\"\"\"\n",
" def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,\n",
" ylim=None, xscale='linear', yscale='linear',\n",
" fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,\n",
" figsize=(3.5, 2.5)):\n",
" # 增量地绘制多条线\n",
" if legend is None:\n",
" legend = []\n",
" use_svg_display()\n",
" self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)\n",
" if nrows * ncols == 1:\n",
" self.axes = [self.axes, ]\n",
" # 使用lambda函数捕获参数\n",
" self.config_axes = lambda: set_axes(\n",
" self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)\n",
" self.X, self.Y, self.fmts = None, None, fmts\n",
" def add(self, x, y):\n",
" # 向图表中添加多个数据点\n",
" if not hasattr(y, \"__len__\"):\n",
" y = [y]\n",
" n = len(y)\n",
" if not hasattr(x, \"__len__\"):\n",
" x = [x] * n\n",
" if not self.X:\n",
" self.X = [[] for _ in range(n)]\n",
" if not self.Y:\n",
" self.Y = [[] for _ in range(n)]\n",
" for i, (a, b) in enumerate(zip(x, y)):\n",
" if a is not None and b is not None:\n",
" self.X[i].append(a)\n",
" self.Y[i].append(b)\n",
" self.axes[0].cla()\n",
" for x, y, fmt in zip(self.X, self.Y, self.fmts):\n",
" self.axes[0].plot(x, y, fmt)\n",
" self.config_axes()\n",
" display.display(self.fig)\n",
" display.clear_output(wait=True)\n"
],
"id": "7cd5367ab43c5e5f",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): #@save\n",
" \"\"\"训练模型定义见第3章\"\"\"\n",
" animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],\n",
" legend=['train loss', 'train acc', 'test acc'])\n",
" for epoch in range(num_epochs):\n",
" train_metrics = train_epoch_ch3(net, train_iter, loss, updater)\n",
" test_acc = evaluate_accuracy(net, test_iter)\n",
" animator.add(epoch + 1, train_metrics + (test_acc,))\n",
" train_loss, train_acc = train_metrics\n",
" assert train_loss < 0.5, train_loss\n",
" assert train_acc <= 1 and train_acc > 0.7, train_acc\n",
" assert test_acc <= 1 and test_acc > 0.7, test_acc"
],
"id": "b02a143c75fad40",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"lr = 0.1\n",
"def updater(batch_size):\n",
" return sgd([W, b], lr, batch_size)"
],
"id": "6a97b70779276b61",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"num_epochs = 10\n",
"#train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)"
],
"id": "df3cceb72faee402",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def predict_ch3(net, test_iter, n=6): #@save\n",
" \"\"\"预测标签定义见第3章\"\"\"\n",
" for (X, y),i in zip(test_iter,range(1)):\n",
" trues = get_fashion_mnist_labels(y)\n",
" preds = get_fashion_mnist_labels(net(X).argmax(axis=1))\n",
" titles = [true +'\\n' + pred for true, pred in zip(trues, preds)]\n",
" show_images(\n",
" X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])\n",
"\n",
"predict_ch3(net, test_iter)\n"
],
"id": "94f6177bfb40eece",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"batch_size = 256\n",
"train_iter, test_iter = load_data_fashion_mnist(batch_size)"
],
"id": "4a0bfd0479ec7386",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))\n",
"def init_weights(m):\n",
" if type(m) == nn.Linear:\n",
" nn.init.normal_(m.weight, std=0.01)\n",
"\n",
"net.apply(init_weights);\n",
"loss = nn.CrossEntropyLoss(reduction='none')\n",
"trainer = torch.optim.SGD(net.parameters(), lr=0.1)\n",
"num_epochs = 10\n",
"train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)"
],
"id": "b9808d88f5e6827b",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)\n",
"y = torch.relu(x)\n",
"plot(x.detach(), y.detach(), 'x', 'relu(x)', figsize=(5, 2.5))"
],
"id": "c25dd146307f58e0",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"y.backward(torch.ones_like(x), retain_graph=True)\n",
"plot(x.detach(), x.grad, 'x', 'grad of relu', figsize=(5, 2.5))"
],
"id": "f96acd2015dccb38",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"y = torch.sigmoid(x)\n",
"plot(x.detach(), y.detach(), 'x', 'sigmoid(x)', figsize=(5, 2.5))"
],
"id": "74013cea59cd8be3",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"x.grad.data.zero_()\n",
"y.backward(torch.ones_like(x),retain_graph=True)\n",
"plot(x.detach(), x.grad, 'x', 'grad of sigmoid', figsize=(5, 2.5))"
],
"id": "6a0b4f529bf9cc5c",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"batch_size = 256\n",
"train_iter, test_iter = load_data_fashion_mnist(batch_size)"
],
"id": "f1de998439b1b9f",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"num_inputs,num_outputs,num_hiddens = 784, 10, 256\n",
"W1=nn.Parameter(torch.randn(num_inputs,num_hiddens,requires_grad=True)*0.01)\n",
"b1=nn.Parameter(torch.zeros(num_hiddens,requires_grad=True))\n",
"W2 = nn.Parameter(torch.randn(num_hiddens,num_outputs,requires_grad=True)*0.01)\n",
"b2=nn.Parameter(torch.zeros(num_outputs,requires_grad=True))\n",
"params=[W1,b1,W2,b2]\n"
],
"id": "adcea8cd4ee792a8",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"def relu(X):\n",
" a = torch.zeros_like(X)\n",
" return torch.max(X,a)\n",
"def net(X):\n",
" X = X.reshape((-1,num_inputs))\n",
" H = relu(X@W1+b1)\n",
" return (H@W2+b2)\n",
"loss = nn.CrossEntropyLoss(reduction='none')\n",
"num_epochs,lr=10,0.05\n",
"updater=torch.optim.SGD(params,lr=lr)\n",
"#train_ch3(net,train_iter,test_iter,loss,num_epochs,updater)\n"
],
"id": "cfd81a0f16d0c573",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "predict_ch3(net, test_iter)",
"id": "f2ed2e9cee14c28a",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"net = nn.Sequential(nn.Flatten(),\n",
" nn.Linear(784,256),\n",
" nn.ReLU(),\n",
" nn.Linear(256,10))\n",
"def init_weights(m):\n",
" if type(m) == nn.Linear:\n",
" nn.init.normal_(m.weight,std=0.01)\n",
"\n",
"net.apply(init_weights)\n"
],
"id": "9e4ed6d103380bc7",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": [
"batch_size,lr, num_epochs=256,0.1,10\n",
"loss = nn.CrossEntropyLoss(reduction='none')\n",
"trainer = torch.optim.SGD(net.parameters(),lr=lr)\n",
"train_iter, test_iter = load_data_fashion_mnist(batch_size)\n",
"#train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)"
],
"id": "52d71c77c4f51e90",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"cell_type": "code",
"source": "",
"id": "94706c936b4be3e1",
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}