{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2026-03-19T05:45:23.532371843Z", "start_time": "2026-03-19T05:45:23.227305138Z" } }, "source": [ "import torch\n", "import numpy\n", "import pandas" ], "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'torch'", "output_type": "error", "traceback": [ "\u001B[31m---------------------------------------------------------------------------\u001B[39m", "\u001B[31mModuleNotFoundError\u001B[39m Traceback (most recent call last)", "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[1]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mtorch\u001B[39;00m\n\u001B[32m 2\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mnumpy\u001B[39;00m\n\u001B[32m 3\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mpandas\u001B[39;00m\n", "\u001B[31mModuleNotFoundError\u001B[39m: No module named 'torch'" ] } ], "execution_count": 1 }, { "metadata": {}, "cell_type": "code", "source": "torch.randn(3,4,2)", "id": "3e141a42d342fa96", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n", "Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])\n", "torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)" ], "id": "8ae20ae68abbf32f", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "a = torch.arange(3).reshape((3, 1))\n", "b = torch.arange(2).reshape((1, 2))\n", "a, b\n", "a+b" ], "id": "2960a1ded2cdd5a4", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "X[-1], X[1:3]\n", "id": "69c2ec23ab6ae97c", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "A = X.numpy()\n", "B = torch.tensor(A)\n", "type(A), type(B)" ], "id": "b8d779a1bc7e4b1a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import os\n", "os.makedirs(os.path.join(\"..\",\"data\"),exist_ok=True)\n", "data_file = os.path.join(os.path.join(\"..\",\"data\",\"data.csv\"))\n", "with open(data_file, \"w\") as f:\n", " f.write('NumRooms,Alley,Price\\n') # 列名\n", " f.write('NA,Pave,127500\\n') # 每行表示一个数据样本\n", " f.write('2,NA,106000\\n')\n", " f.write('4,NA,178100\\n')\n", " f.write('NA,NA,140000\\n')\n", "\n" ], "id": "82be028b0f1dd1e3", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import pandas as pd\n", "data = pd.read_csv(data_file)\n", "print(data)\n" ], "id": "ddd789a2656899d1", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]\n", "\n", "\n", "inputs = pd.get_dummies(inputs, dummy_na=True)\n", "print(inputs)\n", "inputs = inputs.fillna(inputs.mean())\n", "print(inputs)\n" ], "id": "e98fcc3bd4f067cf", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "X = torch.tensor(inputs.to_numpy(dtype=float))\n", "y = torch.tensor(outputs.to_numpy(dtype=float))\n", "X, y\n" ], "id": "8ff0f7b40f0e4996", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "B=torch.tensor([[1,2,3],[2,0,4],[3,4,5]])\n", "B" ], "id": "91a6e0da442b95a0", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "B==B.T", "id": "297e6a678fb19be7", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "X=torch.arange(24).reshape(2,3,4)\n", "X" ], "id": "24e864b336beb58b", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "A = torch.arange(20, dtype=torch.float32).reshape(5, 4)\n", "B = A.clone() # 通过分配新内存,将A的一个副本分配给B\n", "A, A + B\n", "#A = torch.arange(20, dtype=torch.float32).reshape(5, 4)\n", "#B = A # 通过分配新内存,将A的一个副本分配给B\n", "id(A),id(B)" ], "id": "ee0905479b1dbc2b", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "markdown", "source": "Hadamard乘积", "id": "136459f5efe765cf" }, { "metadata": {}, "cell_type": "code", "source": "A*B", "id": "f576b0df17cc0e98", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "a=2\n", "X=torch.arange(24).reshape(2,3,4)\n", "a+X,(a*X).shape" ], "id": "b2373af1d7f2a45", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "print(A)\n", "A_sum_axis0=A.sum(axis=0)\n", "A_sum_axis1=A.sum(axis=1)\n", "A_sum_axis0,A_sum_axis1" ], "id": "2b50246e1ca8a3bc", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x=torch.arange(4,dtype=torch.float32)\n", "torch.mv(A,x)" ], "id": "3195464dfeb554ed", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import time\n", "\n", "def showtime(func):\n", " def wrapper():\n", " start = time.time()\n", " result = func() # 执行原始函数\n", " end = time.time()\n", " print(f\"执行时间: {end - start:.6f}秒\")\n", " return result\n", " return wrapper # 返回包装函数\n", "\n", "@showtime\n", "def fun():\n", " print(\"I am silly\")\n", "\n", "fun()\n" ], "id": "ebda8c74ead3e42b", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "torch.norm(torch.ones((4, 9)))", "id": "3343cc0c01d0161c", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x =torch.arange(4.0,requires_grad=True)\n", "x.grad" ], "id": "674e2416e9417cfe", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "y=2*torch.dot(x,x)\n", "y" ], "id": "66c0febebcf98cde", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "y.backward()\n", "x.grad" ], "id": "825f2ce6c46ca4a8", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x.grad.zero_()\n", "y = x.sum()\n", "y.backward()\n", "x.grad\n" ], "id": "df399463515e9d3c", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "# 对非标量调用backward需要传入一个gradient参数,该参数指定微分函数关于self的梯度。\n", "# 本例只想求偏导数的和,所以传递一个1的梯度是合适的\n", "x.grad.zero_()\n", "y = x * x\n", "# 等价于y.backward(torch.ones(len(x)))\n", "print(y)\n", "y.sum().backward()\n", "x.grad" ], "id": "f9207619bd4b3de8", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "torch.ones(len(x))", "id": "409c14c230570859", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x.grad.zero_()\n", "y=x*x\n", "u=y.detach()\n", "z=u*x\n", "z.sum().backward()\n", "x.grad==u" ], "id": "521b948fe0683b12", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x.grad.zero_()\n", "y.sum().backward()\n", "x.grad==2*x" ], "id": "b040beecf0632315", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "from torch.distributions import multinomial\n", "fair_probs=torch.ones([6])\n", "fair_probs" ], "id": "4e6ec763dbea5aa3", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "multinomial.Multinomial(1, fair_probs).sample()", "id": "f12d5e85bc6ab595", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "counts = multinomial.Multinomial(10, fair_probs).sample((500,))\n", "\n", "cum_counts = counts.cumsum(dim=0)\n", "cum_counts.size()" ], "id": "b02f43376fd6f1fe", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "\n", "# 假设 estimates 是你的数据张量\n", "estimates = cum_counts / cum_counts.sum(dim=1, keepdims=True)\n", "\n", "# 设置图形大小 (等效于 d2l.set_figsize)\n", "plt.figure(figsize=(6, 4.5))\n", "\n", "# 绘制每条概率曲线\n", "for i in range(6):\n", " plt.plot(estimates[:, i].numpy(),\n", " label=f\"P(die={i + 1})\") # 使用 f-string 更简洁\n", "\n", "# 添加理论概率水平线\n", "plt.axhline(y=0.167, color='black', linestyle='dashed', label='Theoretical probability')\n", "\n", "# 设置坐标轴标签\n", "plt.xlabel('Groups of experiments')\n", "plt.ylabel('Estimated probability')\n", "\n", "# 添加图例\n", "plt.legend()\n", "\n", "# 显示图形\n", "plt.show()\n", "#plt.savefig('dice_probability.png', bbox_inches='tight')" ], "id": "8b80daa4edd0b066", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import numpy as np\n", "class Timer:\n", " \"\"\"记录多次运行时间\"\"\"\n", " def __init__(self):\n", " self.times = []\n", " self.start()\n", " def start(self):\n", " \"\"\"启动计时器\"\"\"\n", " self.tik = time.time()\n", " def stop(self):\n", " \"\"\"停止计时器并将时间记录在列表中\"\"\"\n", " self.times.append(time.time() - self.tik)\n", " return self.times[-1]\n", " def avg(self):\n", " \"\"\"返回平均时间\"\"\"\n", " return sum(self.times) / len(self.times)\n", " def sum(self):\n", " \"\"\"返回时间总和\"\"\"\n", " return sum(self.times)\n", " def cumsum(self):\n", " \"\"\"返回累计时间\"\"\"\n", " return np.array(self.times).cumsum().tolist()" ], "id": "4bdbb4999907154a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "n = 10000\n", "a = torch.ones([n])\n", "b = torch.ones([n])\n", "c=torch.zeros(n)\n", "timer = Timer()\n", "for i in range(n):\n", " c[i]=a[i]+b[i]\n", "f'{timer.stop():.5f} sec'" ], "id": "c6f71622e2cc578a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "timer.start()\n", "d=a+b\n", "f'{timer.stop():.5f} sec'" ], "id": "2578c79b1214a79f", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import math\n", "def normal(x, mu, sigma):\n", " p = 1 / math.sqrt(2 * math.pi * sigma**2)\n", " return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)" ], "id": "fd17fdbe38a5f79", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "from matplotlib_inline import backend_inline\n", "def use_svg_display(): #@save\n", " \"\"\"使用svg格式在Jupyter中显示绘图\"\"\"\n", " backend_inline.set_matplotlib_formats('svg')\n", "def set_figsize(figsize=(3.5, 2.5)): #@save\n", " \"\"\"设置matplotlib的图表大小\"\"\"\n", " use_svg_display()\n", " plt.rcParams['figure.figsize'] = figsize\n", "def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n", " \"\"\"设置matplotlib的轴\"\"\"\n", " axes.set_xlabel(xlabel)\n", " axes.set_ylabel(ylabel)\n", " axes.set_xscale(xscale)\n", " axes.set_yscale(yscale)\n", " axes.set_xlim(xlim)\n", " axes.set_ylim(ylim)\n", " if legend:\n", " axes.legend(legend)\n", " axes.grid()\n", "def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,\n", "ylim=None, xscale='linear', yscale='linear',\n", "fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n", " \"\"\"绘制数据点\"\"\"\n", " if legend is None:\n", " legend = []\n", " set_figsize(figsize)\n", " axes = axes if axes else plt.gca()\n", " # 如果X有一个轴,输出True\n", " def has_one_axis(X):\n", " return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n", "and not hasattr(X[0], \"__len__\"))\n", " if has_one_axis(X):\n", " X = [X]\n", " if Y is None:\n", " X, Y = [[]] * len(X), X\n", " elif has_one_axis(Y):\n", " Y = [Y]\n", " if len(X) != len(Y):\n", " X = X * len(Y)\n", " axes.cla()\n", " for x, y, fmt in zip(X, Y, fmts):\n", " if len(x):\n", " axes.plot(x, y, fmt)\n", " else:\n", " axes.plot(y, fmt)\n", " set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)" ], "id": "82158a69cba14da0", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "# 再次使用numpy进行可视化\n", "x = np.arange(-7, 7, 0.01)\n", "# 均值和标准差对\n", "params = [(0, 1), (0, 2), (3, 1)]\n", "plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x',\n", "ylabel='p(x)', figsize=(4.5, 2.5),\n", "legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])" ], "id": "f69ac10ebc3d13d8", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "#注意一下matmul做向量乘上矩阵的时候不用考虑转置的情况\n", "def synthetic_data(w, b, num_examples): #@save\n", " \"\"\"生成y=Xw+b+噪声\"\"\"\n", " X = torch.normal(0, 1, (num_examples, len(w)))\n", " y = torch.matmul(X, w) + b\n", " y += torch.normal(0, 0.01, y.shape)\n", " return X, y.reshape((-1, 1))\n" ], "id": "7ed837bdd2b3a26d", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "true_w = torch.tensor([2, -3.4])\n", "true_b = 4.2\n", "features, labels = synthetic_data(true_w, true_b, 1000)" ], "id": "5ec2e204a6fd5cb2", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "set_figsize()\n", "plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)" ], "id": "38213d46b3d9900d", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "w=torch.normal(0,0.01,size=(2,1),requires_grad=True)\n", "b=torch.zeros(1,requires_grad=True)\n", "def linreg(X, w, b):\n", " return torch.matmul(X,w)+b\n", "def squared_loss(y_hat,y):\n", " return (y_hat-y.reshape(y_hat.shape))**2/2\n", "def sgd(params,lr,batch_size):\n", " with torch.no_grad():\n", " for param in params:\n", " param-=lr*param.grad/batch_size\n", " param.grad.zero_()\n", "lr = 0.03\n", "num_epochs =20\n", "net = linreg\n", "loss = squared_loss" ], "id": "12166e1bc3ddd695", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import random\n", "def data_iter(batch_size, features, labels):\n", " num_examples = len(features)\n", " indices = list(range(num_examples))\n", " # 这些样本是随机读取的,没有特定的顺序\n", " random.shuffle(indices)\n", " for i in range(0, num_examples, batch_size):\n", " batch_indices = torch.tensor(\n", " indices[i: min(i + batch_size, num_examples)])\n", " yield features[batch_indices], labels[batch_indices]" ], "id": "f3b7ee9f326bc687", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "batch_size =10\n", "for X,y in data_iter(batch_size, features, labels):\n", " print(X,'\\n',y)\n", " break" ], "id": "f386e12d65afff2e", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "for epoch in range(num_epochs):\n", " for X, y in data_iter(batch_size, features, labels):\n", " l=loss(net(X, w, b), y)\n", " l.sum().backward()\n", " sgd([w,b],lr,batch_size)\n", " with torch.no_grad():\n", " train_l =loss(net(features, w, b), labels)\n", " print(f'epoch {epoch+1}, train loss: {float(train_l.mean()):3f}')" ], "id": "8888ab6adcec36f1", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')\n", "print(f'b的估计误差: {true_b - b}')" ], "id": "8199439fa7f26309", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "from torch.utils import data\n", "true_w = torch.tensor([2,-3.4])\n", "true_b = 4.2\n", "features,labels=synthetic_data(true_w, true_b, 1000)\n", "def load_array(data_arrays,batch_size,is_train=True):\n", " dataset = data.TensorDataset(*data_arrays)\n", " return data.DataLoader(dataset,batch_size,shuffle=is_train)\n", "batch_size = 10\n", "data_iter = load_array((features,labels),batch_size)" ], "id": "560d537dcbb5a335", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "from torch import nn\n", "net = nn.Sequential(nn.Linear(2, 1))\n", "net[0].weight.data.normal_(0,0.001)\n", "net[0].bias.data.fill_(0)" ], "id": "c54fe059d6fd20de", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "loss = nn.MSELoss()\n", "trainer = torch.optim.SGD(net.parameters(), lr=0.01)\n", "num_epochs = 3\n", "for epoch in range(num_epochs):\n", " for X, y in data_iter:\n", " l = loss(net(X) ,y)\n", " trainer.zero_grad()\n", " l.backward()\n", " trainer.step()\n", " l = loss(net(features), labels)\n", " print(f'epoch {epoch + 1}, loss {l:f}')\n" ], "id": "e8a44851125b7cc6", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import torchvision\n", "from torchvision import transforms\n", "trans =transforms.ToTensor()\n", "mnist_train = torchvision.datasets.FashionMNIST(root=\"./data\",train=True,transform=trans,download=False)\n", "mnist_test = torchvision.datasets.FashionMNIST(root=\"./data\",train=False,transform=trans,download=False)\n" ], "id": "bd4e8a65ccd03177", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "use_svg_display()\n", "len(mnist_train),len(mnist_test)" ], "id": "ed2c915af7f6a76a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "mnist_train[0][0].shape", "id": "4df1cbc292aa5981", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def get_fashion_mnist_labels(labels):\n", " text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',\n", "'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']\n", " return [text_labels[int(i)] for i in labels]\n" ], "id": "332f3d6da0bafbe8", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def show_images(imgs, num_rows, num_cols, titles=None, scale=1): #@save\n", " \"\"\"绘制图像列表\"\"\"\n", " figsize = (num_cols * scale, num_rows * scale)\n", " _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)\n", " axes = axes.flatten()\n", " for i, (ax, img) in enumerate(zip(axes, imgs)):\n", " if torch.is_tensor(img):\n", " # 图片张量\n", " ax.imshow(img.numpy())\n", " else:\n", " # PIL图片\n", " ax.imshow(img)\n", " ax.axes.get_xaxis().set_visible(False)\n", " ax.axes.get_yaxis().set_visible(False)\n", " if titles:\n", " ax.set_title(titles[i])\n", " return axes" ], "id": "c83202fe9b0ab487", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))\n", "print(X.shape)\n", "show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y));" ], "id": "cf4abd8370d55416", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "batch_size = 256\n", "def get_dataloader_workers():\n", " \"\"\"使用4个进程来读取数据\"\"\"\n", " return 4\n", "\n", "train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True,\n", "num_workers=get_dataloader_workers())\n", "timer = Timer()\n", "for X, y in train_iter:\n", " continue\n", "f'{timer.stop():.2f} sec'" ], "id": "552769fbffc16142", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def load_data_fashion_mnist(batch_size, resize=None):\n", " \"\"\"下载Fashion-MNIST数据集,然后将其加载到内存中\"\"\"\n", " trans = [transforms.ToTensor()]\n", " if resize:\n", " trans.insert(0, transforms.Resize(resize))\n", " trans = transforms.Compose(trans)\n", " mnist_train = torchvision.datasets.FashionMNIST(\n", " root=\"./data\", train=True, transform=trans, download=False)\n", " mnist_test = torchvision.datasets.FashionMNIST(\n", " root=\"./data\", train=False, transform=trans, download=False)\n", " return (data.DataLoader(mnist_train, batch_size, shuffle=True,\n", " num_workers=get_dataloader_workers()),\n", " data.DataLoader(mnist_test, batch_size, shuffle=False,\n", " num_workers=get_dataloader_workers()))" ], "id": "aa81880abd86cae6", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "train_iter, test_iter = load_data_fashion_mnist(32, resize=64)\n", "for X, y in train_iter:\n", " print(X.shape, X.dtype, y.shape, y.dtype)\n", " break" ], "id": "4248a103f745154", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "from IPython import display\n", "batch_size = 256\n", "train_iter, test_iter = load_data_fashion_mnist(32)\n", "num_inputs = 784\n", "num_outputs = 10\n", "W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)\n", "b = torch.zeros(num_outputs, requires_grad=True)\n", "X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])\n", "X.sum(0, keepdim=True), X.sum(1, keepdim=True)\n" ], "id": "94c52f0cca88ef48", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def softmax(X):\n", " X_exp = torch.exp(X)\n", " partition = X_exp.sum(1, keepdim=True)\n", " return X_exp / partition # 这里应用了广播机制\n", "X = torch.normal(0, 1, (2, 5))\n", "X_prob = softmax(X)\n", "X_prob, X_prob.sum(1)" ], "id": "c4ab34373c5a664e", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def net(X):\n", " return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)" ], "id": "6eacc53b2b9738af", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "y = torch.tensor([0, 2])\n", "y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])\n", "y_hat[[0, 1], y]" ], "id": "698449b4dafb545c", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def cross_entropy(y_hat, y):\n", " return - torch.log(y_hat[range(len(y_hat)), y])\n", "cross_entropy(y_hat, y)" ], "id": "1720369fc8568c8c", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def accuracy(y_hat, y): #@save\n", " \"\"\"计算预测正确的数量\"\"\"\n", " if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:\n", " y_hat = y_hat.argmax(axis=1)\n", " cmp = y_hat.type(y.dtype) == y\n", " return float(cmp.type(y.dtype).sum())\n", "\n", "accuracy(y_hat, y)/len(y)" ], "id": "e65719500a64ed87", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "class Accumulator: #@save\n", " \"\"\"在n个变量上累加\"\"\"\n", " def __init__(self, n):\n", " self.data = [0.0] * n\n", " def add(self, *args):\n", " self.data = [a + float(b) for a, b in zip(self.data, args)]\n", " def reset(self):\n", " self.data = [0.0] * len(self.data)\n", " def __getitem__(self, idx):\n", " return self.data[idx]\n" ], "id": "f1eebb35ff2e9fea", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def evaluate_accuracy(net, data_iter): #@save\n", " \"\"\"计算在指定数据集上模型的精度\"\"\"\n", " if isinstance(net, torch.nn.Module):\n", " net.eval() # 将模型设置为评估模式\n", " metric = Accumulator(2) # 正确预测数、预测总数\n", " with torch.no_grad():\n", " for X, y in data_iter:\n", " metric.add(accuracy(net(X), y), y.numel())\n", " return metric[0] / metric[1]\n" ], "id": "bc2beb5f2d6afe7e", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "evaluate_accuracy(net, test_iter)", "id": "65bcfb7e40c1a98b", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def train_epoch_ch3(net, train_iter, loss, updater): #@save\n", " \"\"\"训练模型一个迭代周期(定义见第3章)\"\"\"\n", " # 将模型设置为训练模式\n", " if isinstance(net, torch.nn.Module):\n", " net.train()\n", " # 训练损失总和、训练准确度总和、样本数\n", " metric = Accumulator(3)\n", " for X, y in train_iter:\n", " # 计算梯度并更新参数\n", " y_hat = net(X)\n", " l = loss(y_hat, y)\n", " if isinstance(updater, torch.optim.Optimizer):\n", " # 使用PyTorch内置的优化器和损失函数\n", " updater.zero_grad()\n", " l.mean().backward()\n", " updater.step()\n", " else:\n", " # 使用定制的优化器和损失函数\n", " l.sum().backward()\n", " updater(X.shape[0])\n", " metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())\n", "# 返回训练损失和训练精度\n", " return metric[0] / metric[2], metric[1] / metric[2]" ], "id": "2faf1dcc6c023a53", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "class Animator: #@save\n", " \"\"\"在动画中绘制数据\"\"\"\n", " def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,\n", " ylim=None, xscale='linear', yscale='linear',\n", " fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,\n", " figsize=(3.5, 2.5)):\n", " # 增量地绘制多条线\n", " if legend is None:\n", " legend = []\n", " use_svg_display()\n", " self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)\n", " if nrows * ncols == 1:\n", " self.axes = [self.axes, ]\n", " # 使用lambda函数捕获参数\n", " self.config_axes = lambda: set_axes(\n", " self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)\n", " self.X, self.Y, self.fmts = None, None, fmts\n", " def add(self, x, y):\n", " # 向图表中添加多个数据点\n", " if not hasattr(y, \"__len__\"):\n", " y = [y]\n", " n = len(y)\n", " if not hasattr(x, \"__len__\"):\n", " x = [x] * n\n", " if not self.X:\n", " self.X = [[] for _ in range(n)]\n", " if not self.Y:\n", " self.Y = [[] for _ in range(n)]\n", " for i, (a, b) in enumerate(zip(x, y)):\n", " if a is not None and b is not None:\n", " self.X[i].append(a)\n", " self.Y[i].append(b)\n", " self.axes[0].cla()\n", " for x, y, fmt in zip(self.X, self.Y, self.fmts):\n", " self.axes[0].plot(x, y, fmt)\n", " self.config_axes()\n", " display.display(self.fig)\n", " display.clear_output(wait=True)\n" ], "id": "7cd5367ab43c5e5f", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): #@save\n", " \"\"\"训练模型(定义见第3章)\"\"\"\n", " animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],\n", " legend=['train loss', 'train acc', 'test acc'])\n", " for epoch in range(num_epochs):\n", " train_metrics = train_epoch_ch3(net, train_iter, loss, updater)\n", " test_acc = evaluate_accuracy(net, test_iter)\n", " animator.add(epoch + 1, train_metrics + (test_acc,))\n", " train_loss, train_acc = train_metrics\n", " assert train_loss < 0.5, train_loss\n", " assert train_acc <= 1 and train_acc > 0.7, train_acc\n", " assert test_acc <= 1 and test_acc > 0.7, test_acc" ], "id": "b02a143c75fad40", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "lr = 0.1\n", "def updater(batch_size):\n", " return sgd([W, b], lr, batch_size)" ], "id": "6a97b70779276b61", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "num_epochs = 10\n", "#train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)" ], "id": "df3cceb72faee402", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def predict_ch3(net, test_iter, n=6): #@save\n", " \"\"\"预测标签(定义见第3章)\"\"\"\n", " for (X, y),i in zip(test_iter,range(1)):\n", " trues = get_fashion_mnist_labels(y)\n", " preds = get_fashion_mnist_labels(net(X).argmax(axis=1))\n", " titles = [true +'\\n' + pred for true, pred in zip(trues, preds)]\n", " show_images(\n", " X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])\n", "\n", "predict_ch3(net, test_iter)\n" ], "id": "94f6177bfb40eece", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "batch_size = 256\n", "train_iter, test_iter = load_data_fashion_mnist(batch_size)" ], "id": "4a0bfd0479ec7386", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))\n", "def init_weights(m):\n", " if type(m) == nn.Linear:\n", " nn.init.normal_(m.weight, std=0.01)\n", "\n", "net.apply(init_weights);\n", "loss = nn.CrossEntropyLoss(reduction='none')\n", "trainer = torch.optim.SGD(net.parameters(), lr=0.1)\n", "num_epochs = 10\n", "train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)" ], "id": "b9808d88f5e6827b", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)\n", "y = torch.relu(x)\n", "plot(x.detach(), y.detach(), 'x', 'relu(x)', figsize=(5, 2.5))" ], "id": "c25dd146307f58e0", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "y.backward(torch.ones_like(x), retain_graph=True)\n", "plot(x.detach(), x.grad, 'x', 'grad of relu', figsize=(5, 2.5))" ], "id": "f96acd2015dccb38", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "y = torch.sigmoid(x)\n", "plot(x.detach(), y.detach(), 'x', 'sigmoid(x)', figsize=(5, 2.5))" ], "id": "74013cea59cd8be3", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "x.grad.data.zero_()\n", "y.backward(torch.ones_like(x),retain_graph=True)\n", "plot(x.detach(), x.grad, 'x', 'grad of sigmoid', figsize=(5, 2.5))" ], "id": "6a0b4f529bf9cc5c", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "batch_size = 256\n", "train_iter, test_iter = load_data_fashion_mnist(batch_size)" ], "id": "f1de998439b1b9f", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "num_inputs,num_outputs,num_hiddens = 784, 10, 256\n", "W1=nn.Parameter(torch.randn(num_inputs,num_hiddens,requires_grad=True)*0.01)\n", "b1=nn.Parameter(torch.zeros(num_hiddens,requires_grad=True))\n", "W2 = nn.Parameter(torch.randn(num_hiddens,num_outputs,requires_grad=True)*0.01)\n", "b2=nn.Parameter(torch.zeros(num_outputs,requires_grad=True))\n", "params=[W1,b1,W2,b2]\n" ], "id": "adcea8cd4ee792a8", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def relu(X):\n", " a = torch.zeros_like(X)\n", " return torch.max(X,a)\n", "def net(X):\n", " X = X.reshape((-1,num_inputs))\n", " H = relu(X@W1+b1)\n", " return (H@W2+b2)\n", "loss = nn.CrossEntropyLoss(reduction='none')\n", "num_epochs,lr=10,0.05\n", "updater=torch.optim.SGD(params,lr=lr)\n", "#train_ch3(net,train_iter,test_iter,loss,num_epochs,updater)\n" ], "id": "cfd81a0f16d0c573", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "predict_ch3(net, test_iter)", "id": "f2ed2e9cee14c28a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "net = nn.Sequential(nn.Flatten(),\n", " nn.Linear(784,256),\n", " nn.ReLU(),\n", " nn.Linear(256,10))\n", "def init_weights(m):\n", " if type(m) == nn.Linear:\n", " nn.init.normal_(m.weight,std=0.01)\n", "\n", "net.apply(init_weights)\n" ], "id": "9e4ed6d103380bc7", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "batch_size,lr, num_epochs=256,0.1,10\n", "loss = nn.CrossEntropyLoss(reduction='none')\n", "trainer = torch.optim.SGD(net.parameters(),lr=lr)\n", "train_iter, test_iter = load_data_fashion_mnist(batch_size)\n", "#train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)" ], "id": "52d71c77c4f51e90", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": "", "id": "94706c936b4be3e1", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }