nn/test.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2026-03-19T05:45:23.532371843Z",
     "start_time": "2026-03-19T05:45:23.227305138Z"
    }
   },
   "source": [
    "import torch\n",
    "import numpy\n",
    "import pandas"
   ],
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'torch'",
     "output_type": "error",
     "traceback": [
      "\u001B[31m---------------------------------------------------------------------------\u001B[39m",
      "\u001B[31mModuleNotFoundError\u001B[39m                       Traceback (most recent call last)",
      "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[1]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mtorch\u001B[39;00m\n\u001B[32m      2\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mnumpy\u001B[39;00m\n\u001B[32m      3\u001B[39m \u001B[38;5;28;01mimport\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mpandas\u001B[39;00m\n",
      "\u001B[31mModuleNotFoundError\u001B[39m: No module named 'torch'"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "torch.randn(3,4,2)",
   "id": "3e141a42d342fa96",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n",
    "Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])\n",
    "torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)"
   ],
   "id": "8ae20ae68abbf32f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "a = torch.arange(3).reshape((3, 1))\n",
    "b = torch.arange(2).reshape((1, 2))\n",
    "a, b\n",
    "a+b"
   ],
   "id": "2960a1ded2cdd5a4",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "X[-1], X[1:3]\n",
   "id": "69c2ec23ab6ae97c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "A = X.numpy()\n",
    "B = torch.tensor(A)\n",
    "type(A), type(B)"
   ],
   "id": "b8d779a1bc7e4b1a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import os\n",
    "os.makedirs(os.path.join(\"..\",\"data\"),exist_ok=True)\n",
    "data_file = os.path.join(os.path.join(\"..\",\"data\",\"data.csv\"))\n",
    "with open(data_file, \"w\") as f:\n",
    "    f.write('NumRooms,Alley,Price\\n') # 列名\n",
    "    f.write('NA,Pave,127500\\n') # 每行表示一个数据样本\n",
    "    f.write('2,NA,106000\\n')\n",
    "    f.write('4,NA,178100\\n')\n",
    "    f.write('NA,NA,140000\\n')\n",
    "\n"
   ],
   "id": "82be028b0f1dd1e3",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "data = pd.read_csv(data_file)\n",
    "print(data)\n"
   ],
   "id": "ddd789a2656899d1",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]\n",
    "\n",
    "\n",
    "inputs = pd.get_dummies(inputs, dummy_na=True)\n",
    "print(inputs)\n",
    "inputs = inputs.fillna(inputs.mean())\n",
    "print(inputs)\n"
   ],
   "id": "e98fcc3bd4f067cf",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "X = torch.tensor(inputs.to_numpy(dtype=float))\n",
    "y = torch.tensor(outputs.to_numpy(dtype=float))\n",
    "X, y\n"
   ],
   "id": "8ff0f7b40f0e4996",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "B=torch.tensor([[1,2,3],[2,0,4],[3,4,5]])\n",
    "B"
   ],
   "id": "91a6e0da442b95a0",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "B==B.T",
   "id": "297e6a678fb19be7",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "X=torch.arange(24).reshape(2,3,4)\n",
    "X"
   ],
   "id": "24e864b336beb58b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "A = torch.arange(20, dtype=torch.float32).reshape(5, 4)\n",
    "B = A.clone() # 通过分配新内存，将A的一个副本分配给B\n",
    "A, A + B\n",
    "#A = torch.arange(20, dtype=torch.float32).reshape(5, 4)\n",
    "#B = A # 通过分配新内存，将A的一个副本分配给B\n",
    "id(A),id(B)"
   ],
   "id": "ee0905479b1dbc2b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "Hadamard乘积",
   "id": "136459f5efe765cf"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "A*B",
   "id": "f576b0df17cc0e98",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "a=2\n",
    "X=torch.arange(24).reshape(2,3,4)\n",
    "a+X,(a*X).shape"
   ],
   "id": "b2373af1d7f2a45",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "print(A)\n",
    "A_sum_axis0=A.sum(axis=0)\n",
    "A_sum_axis1=A.sum(axis=1)\n",
    "A_sum_axis0,A_sum_axis1"
   ],
   "id": "2b50246e1ca8a3bc",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x=torch.arange(4,dtype=torch.float32)\n",
    "torch.mv(A,x)"
   ],
   "id": "3195464dfeb554ed",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import time\n",
    "\n",
    "def showtime(func):\n",
    "    def wrapper():\n",
    "        start = time.time()\n",
    "        result = func()  # 执行原始函数\n",
    "        end = time.time()\n",
    "        print(f\"执行时间: {end - start:.6f}秒\")\n",
    "        return result\n",
    "    return wrapper  # 返回包装函数\n",
    "\n",
    "@showtime\n",
    "def fun():\n",
    "    print(\"I am silly\")\n",
    "\n",
    "fun()\n"
   ],
   "id": "ebda8c74ead3e42b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "torch.norm(torch.ones((4, 9)))",
   "id": "3343cc0c01d0161c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x =torch.arange(4.0,requires_grad=True)\n",
    "x.grad"
   ],
   "id": "674e2416e9417cfe",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "y=2*torch.dot(x,x)\n",
    "y"
   ],
   "id": "66c0febebcf98cde",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "y.backward()\n",
    "x.grad"
   ],
   "id": "825f2ce6c46ca4a8",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x.grad.zero_()\n",
    "y = x.sum()\n",
    "y.backward()\n",
    "x.grad\n"
   ],
   "id": "df399463515e9d3c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 对非标量调用backward需要传入一个gradient参数，该参数指定微分函数关于self的梯度。\n",
    "# 本例只想求偏导数的和，所以传递一个1的梯度是合适的\n",
    "x.grad.zero_()\n",
    "y = x * x\n",
    "# 等价于y.backward(torch.ones(len(x)))\n",
    "print(y)\n",
    "y.sum().backward()\n",
    "x.grad"
   ],
   "id": "f9207619bd4b3de8",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "torch.ones(len(x))",
   "id": "409c14c230570859",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x.grad.zero_()\n",
    "y=x*x\n",
    "u=y.detach()\n",
    "z=u*x\n",
    "z.sum().backward()\n",
    "x.grad==u"
   ],
   "id": "521b948fe0683b12",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x.grad.zero_()\n",
    "y.sum().backward()\n",
    "x.grad==2*x"
   ],
   "id": "b040beecf0632315",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "from torch.distributions import multinomial\n",
    "fair_probs=torch.ones([6])\n",
    "fair_probs"
   ],
   "id": "4e6ec763dbea5aa3",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "multinomial.Multinomial(1, fair_probs).sample()",
   "id": "f12d5e85bc6ab595",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "counts = multinomial.Multinomial(10, fair_probs).sample((500,))\n",
    "\n",
    "cum_counts = counts.cumsum(dim=0)\n",
    "cum_counts.size()"
   ],
   "id": "b02f43376fd6f1fe",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 假设 estimates 是你的数据张量\n",
    "estimates = cum_counts / cum_counts.sum(dim=1, keepdims=True)\n",
    "\n",
    "# 设置图形大小 (等效于 d2l.set_figsize)\n",
    "plt.figure(figsize=(6, 4.5))\n",
    "\n",
    "# 绘制每条概率曲线\n",
    "for i in range(6):\n",
    "    plt.plot(estimates[:, i].numpy(),\n",
    "             label=f\"P(die={i + 1})\")  # 使用 f-string 更简洁\n",
    "\n",
    "# 添加理论概率水平线\n",
    "plt.axhline(y=0.167, color='black', linestyle='dashed', label='Theoretical probability')\n",
    "\n",
    "# 设置坐标轴标签\n",
    "plt.xlabel('Groups of experiments')\n",
    "plt.ylabel('Estimated probability')\n",
    "\n",
    "# 添加图例\n",
    "plt.legend()\n",
    "\n",
    "# 显示图形\n",
    "plt.show()\n",
    "#plt.savefig('dice_probability.png', bbox_inches='tight')"
   ],
   "id": "8b80daa4edd0b066",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import numpy as np\n",
    "class Timer:\n",
    "    \"\"\"记录多次运行时间\"\"\"\n",
    "    def __init__(self):\n",
    "        self.times = []\n",
    "        self.start()\n",
    "    def start(self):\n",
    "        \"\"\"启动计时器\"\"\"\n",
    "        self.tik = time.time()\n",
    "    def stop(self):\n",
    "        \"\"\"停止计时器并将时间记录在列表中\"\"\"\n",
    "        self.times.append(time.time() - self.tik)\n",
    "        return self.times[-1]\n",
    "    def avg(self):\n",
    "        \"\"\"返回平均时间\"\"\"\n",
    "        return sum(self.times) / len(self.times)\n",
    "    def sum(self):\n",
    "        \"\"\"返回时间总和\"\"\"\n",
    "        return sum(self.times)\n",
    "    def cumsum(self):\n",
    "        \"\"\"返回累计时间\"\"\"\n",
    "        return np.array(self.times).cumsum().tolist()"
   ],
   "id": "4bdbb4999907154a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "n = 10000\n",
    "a = torch.ones([n])\n",
    "b = torch.ones([n])\n",
    "c=torch.zeros(n)\n",
    "timer = Timer()\n",
    "for i in range(n):\n",
    "    c[i]=a[i]+b[i]\n",
    "f'{timer.stop():.5f} sec'"
   ],
   "id": "c6f71622e2cc578a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "timer.start()\n",
    "d=a+b\n",
    "f'{timer.stop():.5f} sec'"
   ],
   "id": "2578c79b1214a79f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import math\n",
    "def normal(x, mu, sigma):\n",
    "    p = 1 / math.sqrt(2 * math.pi * sigma**2)\n",
    "    return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)"
   ],
   "id": "fd17fdbe38a5f79",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "from matplotlib_inline import backend_inline\n",
    "def use_svg_display(): #@save\n",
    "    \"\"\"使用svg格式在Jupyter中显示绘图\"\"\"\n",
    "    backend_inline.set_matplotlib_formats('svg')\n",
    "def set_figsize(figsize=(3.5, 2.5)): #@save\n",
    "    \"\"\"设置matplotlib的图表大小\"\"\"\n",
    "    use_svg_display()\n",
    "    plt.rcParams['figure.figsize'] = figsize\n",
    "def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n",
    "    \"\"\"设置matplotlib的轴\"\"\"\n",
    "    axes.set_xlabel(xlabel)\n",
    "    axes.set_ylabel(ylabel)\n",
    "    axes.set_xscale(xscale)\n",
    "    axes.set_yscale(yscale)\n",
    "    axes.set_xlim(xlim)\n",
    "    axes.set_ylim(ylim)\n",
    "    if legend:\n",
    "        axes.legend(legend)\n",
    "    axes.grid()\n",
    "def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,\n",
    "ylim=None, xscale='linear', yscale='linear',\n",
    "fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n",
    "    \"\"\"绘制数据点\"\"\"\n",
    "    if legend is None:\n",
    "        legend = []\n",
    "    set_figsize(figsize)\n",
    "    axes = axes if axes else plt.gca()\n",
    "    # 如果X有一个轴，输出True\n",
    "    def has_one_axis(X):\n",
    "        return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n",
    "and not hasattr(X[0], \"__len__\"))\n",
    "    if has_one_axis(X):\n",
    "        X = [X]\n",
    "    if Y is None:\n",
    "        X, Y = [[]] * len(X), X\n",
    "    elif has_one_axis(Y):\n",
    "        Y = [Y]\n",
    "    if len(X) != len(Y):\n",
    "        X = X * len(Y)\n",
    "    axes.cla()\n",
    "    for x, y, fmt in zip(X, Y, fmts):\n",
    "        if len(x):\n",
    "            axes.plot(x, y, fmt)\n",
    "        else:\n",
    "            axes.plot(y, fmt)\n",
    "    set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)"
   ],
   "id": "82158a69cba14da0",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 再次使用numpy进行可视化\n",
    "x = np.arange(-7, 7, 0.01)\n",
    "# 均值和标准差对\n",
    "params = [(0, 1), (0, 2), (3, 1)]\n",
    "plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x',\n",
    "ylabel='p(x)', figsize=(4.5, 2.5),\n",
    "legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])"
   ],
   "id": "f69ac10ebc3d13d8",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "#注意一下matmul做向量乘上矩阵的时候不用考虑转置的情况\n",
    "def synthetic_data(w, b, num_examples): #@save\n",
    "    \"\"\"生成y=Xw+b+噪声\"\"\"\n",
    "    X = torch.normal(0, 1, (num_examples, len(w)))\n",
    "    y = torch.matmul(X, w) + b\n",
    "    y += torch.normal(0, 0.01, y.shape)\n",
    "    return X, y.reshape((-1, 1))\n"
   ],
   "id": "7ed837bdd2b3a26d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "true_w = torch.tensor([2, -3.4])\n",
    "true_b = 4.2\n",
    "features, labels = synthetic_data(true_w, true_b, 1000)"
   ],
   "id": "5ec2e204a6fd5cb2",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "set_figsize()\n",
    "plt.scatter(features[:, (1)].detach().numpy(), labels.detach().numpy(), 1)"
   ],
   "id": "38213d46b3d9900d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "w=torch.normal(0,0.01,size=(2,1),requires_grad=True)\n",
    "b=torch.zeros(1,requires_grad=True)\n",
    "def linreg(X, w, b):\n",
    "    return torch.matmul(X,w)+b\n",
    "def squared_loss(y_hat,y):\n",
    "    return (y_hat-y.reshape(y_hat.shape))**2/2\n",
    "def sgd(params,lr,batch_size):\n",
    "    with torch.no_grad():\n",
    "        for param in params:\n",
    "            param-=lr*param.grad/batch_size\n",
    "            param.grad.zero_()\n",
    "lr = 0.03\n",
    "num_epochs =20\n",
    "net = linreg\n",
    "loss = squared_loss"
   ],
   "id": "12166e1bc3ddd695",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import random\n",
    "def data_iter(batch_size, features, labels):\n",
    "    num_examples = len(features)\n",
    "    indices = list(range(num_examples))\n",
    "    # 这些样本是随机读取的，没有特定的顺序\n",
    "    random.shuffle(indices)\n",
    "    for i in range(0, num_examples, batch_size):\n",
    "        batch_indices = torch.tensor(\n",
    "            indices[i: min(i + batch_size, num_examples)])\n",
    "        yield features[batch_indices], labels[batch_indices]"
   ],
   "id": "f3b7ee9f326bc687",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "batch_size =10\n",
    "for X,y in data_iter(batch_size, features, labels):\n",
    "    print(X,'\\n',y)\n",
    "    break"
   ],
   "id": "f386e12d65afff2e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "for epoch in range(num_epochs):\n",
    "    for X, y in data_iter(batch_size, features, labels):\n",
    "        l=loss(net(X, w, b), y)\n",
    "        l.sum().backward()\n",
    "        sgd([w,b],lr,batch_size)\n",
    "    with torch.no_grad():\n",
    "        train_l =loss(net(features, w, b), labels)\n",
    "        print(f'epoch {epoch+1}, train loss: {float(train_l.mean()):3f}')"
   ],
   "id": "8888ab6adcec36f1",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')\n",
    "print(f'b的估计误差: {true_b - b}')"
   ],
   "id": "8199439fa7f26309",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "from torch.utils import data\n",
    "true_w = torch.tensor([2,-3.4])\n",
    "true_b = 4.2\n",
    "features,labels=synthetic_data(true_w, true_b, 1000)\n",
    "def load_array(data_arrays,batch_size,is_train=True):\n",
    "    dataset = data.TensorDataset(*data_arrays)\n",
    "    return data.DataLoader(dataset,batch_size,shuffle=is_train)\n",
    "batch_size = 10\n",
    "data_iter = load_array((features,labels),batch_size)"
   ],
   "id": "560d537dcbb5a335",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "from torch import nn\n",
    "net = nn.Sequential(nn.Linear(2, 1))\n",
    "net[0].weight.data.normal_(0,0.001)\n",
    "net[0].bias.data.fill_(0)"
   ],
   "id": "c54fe059d6fd20de",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "loss = nn.MSELoss()\n",
    "trainer = torch.optim.SGD(net.parameters(), lr=0.01)\n",
    "num_epochs = 3\n",
    "for epoch in range(num_epochs):\n",
    "    for X, y in data_iter:\n",
    "        l = loss(net(X) ,y)\n",
    "        trainer.zero_grad()\n",
    "        l.backward()\n",
    "        trainer.step()\n",
    "    l = loss(net(features), labels)\n",
    "    print(f'epoch {epoch + 1}, loss {l:f}')\n"
   ],
   "id": "e8a44851125b7cc6",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import torchvision\n",
    "from torchvision import transforms\n",
    "trans =transforms.ToTensor()\n",
    "mnist_train = torchvision.datasets.FashionMNIST(root=\"./data\",train=True,transform=trans,download=False)\n",
    "mnist_test = torchvision.datasets.FashionMNIST(root=\"./data\",train=False,transform=trans,download=False)\n"
   ],
   "id": "bd4e8a65ccd03177",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "use_svg_display()\n",
    "len(mnist_train),len(mnist_test)"
   ],
   "id": "ed2c915af7f6a76a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "mnist_train[0][0].shape",
   "id": "4df1cbc292aa5981",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def get_fashion_mnist_labels(labels):\n",
    "    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',\n",
    "'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']\n",
    "    return [text_labels[int(i)] for i in labels]\n"
   ],
   "id": "332f3d6da0bafbe8",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def show_images(imgs, num_rows, num_cols, titles=None, scale=1): #@save\n",
    "    \"\"\"绘制图像列表\"\"\"\n",
    "    figsize = (num_cols * scale, num_rows * scale)\n",
    "    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)\n",
    "    axes = axes.flatten()\n",
    "    for i, (ax, img) in enumerate(zip(axes, imgs)):\n",
    "        if torch.is_tensor(img):\n",
    "        # 图片张量\n",
    "            ax.imshow(img.numpy())\n",
    "        else:\n",
    "    # PIL图片\n",
    "            ax.imshow(img)\n",
    "        ax.axes.get_xaxis().set_visible(False)\n",
    "        ax.axes.get_yaxis().set_visible(False)\n",
    "        if titles:\n",
    "            ax.set_title(titles[i])\n",
    "    return axes"
   ],
   "id": "c83202fe9b0ab487",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))\n",
    "print(X.shape)\n",
    "show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y));"
   ],
   "id": "cf4abd8370d55416",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "batch_size = 256\n",
    "def get_dataloader_workers():\n",
    "    \"\"\"使用4个进程来读取数据\"\"\"\n",
    "    return 4\n",
    "\n",
    "train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True,\n",
    "num_workers=get_dataloader_workers())\n",
    "timer = Timer()\n",
    "for X, y in train_iter:\n",
    "    continue\n",
    "f'{timer.stop():.2f} sec'"
   ],
   "id": "552769fbffc16142",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def load_data_fashion_mnist(batch_size, resize=None):\n",
    "    \"\"\"下载Fashion-MNIST数据集，然后将其加载到内存中\"\"\"\n",
    "    trans = [transforms.ToTensor()]\n",
    "    if resize:\n",
    "        trans.insert(0, transforms.Resize(resize))\n",
    "    trans = transforms.Compose(trans)\n",
    "    mnist_train = torchvision.datasets.FashionMNIST(\n",
    "        root=\"./data\", train=True, transform=trans, download=False)\n",
    "    mnist_test = torchvision.datasets.FashionMNIST(\n",
    "        root=\"./data\", train=False, transform=trans, download=False)\n",
    "    return (data.DataLoader(mnist_train, batch_size, shuffle=True,\n",
    "                            num_workers=get_dataloader_workers()),\n",
    "            data.DataLoader(mnist_test, batch_size, shuffle=False,\n",
    "                            num_workers=get_dataloader_workers()))"
   ],
   "id": "aa81880abd86cae6",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "train_iter, test_iter = load_data_fashion_mnist(32, resize=64)\n",
    "for X, y in train_iter:\n",
    "    print(X.shape, X.dtype, y.shape, y.dtype)\n",
    "    break"
   ],
   "id": "4248a103f745154",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "from IPython import display\n",
    "batch_size = 256\n",
    "train_iter, test_iter = load_data_fashion_mnist(32)\n",
    "num_inputs = 784\n",
    "num_outputs = 10\n",
    "W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)\n",
    "b = torch.zeros(num_outputs, requires_grad=True)\n",
    "X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])\n",
    "X.sum(0, keepdim=True), X.sum(1, keepdim=True)\n"
   ],
   "id": "94c52f0cca88ef48",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def softmax(X):\n",
    "    X_exp = torch.exp(X)\n",
    "    partition = X_exp.sum(1, keepdim=True)\n",
    "    return X_exp / partition # 这里应用了广播机制\n",
    "X = torch.normal(0, 1, (2, 5))\n",
    "X_prob = softmax(X)\n",
    "X_prob, X_prob.sum(1)"
   ],
   "id": "c4ab34373c5a664e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def net(X):\n",
    "    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)"
   ],
   "id": "6eacc53b2b9738af",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "y = torch.tensor([0, 2])\n",
    "y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])\n",
    "y_hat[[0, 1], y]"
   ],
   "id": "698449b4dafb545c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def cross_entropy(y_hat, y):\n",
    "    return - torch.log(y_hat[range(len(y_hat)), y])\n",
    "cross_entropy(y_hat, y)"
   ],
   "id": "1720369fc8568c8c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def accuracy(y_hat, y): #@save\n",
    "    \"\"\"计算预测正确的数量\"\"\"\n",
    "    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:\n",
    "        y_hat = y_hat.argmax(axis=1)\n",
    "    cmp = y_hat.type(y.dtype) == y\n",
    "    return float(cmp.type(y.dtype).sum())\n",
    "\n",
    "accuracy(y_hat, y)/len(y)"
   ],
   "id": "e65719500a64ed87",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "class Accumulator: #@save\n",
    "    \"\"\"在n个变量上累加\"\"\"\n",
    "    def __init__(self, n):\n",
    "        self.data = [0.0] * n\n",
    "    def add(self, *args):\n",
    "        self.data = [a + float(b) for a, b in zip(self.data, args)]\n",
    "    def reset(self):\n",
    "        self.data = [0.0] * len(self.data)\n",
    "    def __getitem__(self, idx):\n",
    "        return self.data[idx]\n"
   ],
   "id": "f1eebb35ff2e9fea",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def evaluate_accuracy(net, data_iter): #@save\n",
    "    \"\"\"计算在指定数据集上模型的精度\"\"\"\n",
    "    if isinstance(net, torch.nn.Module):\n",
    "        net.eval() # 将模型设置为评估模式\n",
    "    metric = Accumulator(2) # 正确预测数、预测总数\n",
    "    with torch.no_grad():\n",
    "        for X, y in data_iter:\n",
    "            metric.add(accuracy(net(X), y), y.numel())\n",
    "    return metric[0] / metric[1]\n"
   ],
   "id": "bc2beb5f2d6afe7e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "evaluate_accuracy(net, test_iter)",
   "id": "65bcfb7e40c1a98b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def train_epoch_ch3(net, train_iter, loss, updater): #@save\n",
    "    \"\"\"训练模型一个迭代周期（定义见第3章）\"\"\"\n",
    "    # 将模型设置为训练模式\n",
    "    if isinstance(net, torch.nn.Module):\n",
    "        net.train()\n",
    "    # 训练损失总和、训练准确度总和、样本数\n",
    "    metric = Accumulator(3)\n",
    "    for X, y in train_iter:\n",
    "    # 计算梯度并更新参数\n",
    "        y_hat = net(X)\n",
    "        l = loss(y_hat, y)\n",
    "        if isinstance(updater, torch.optim.Optimizer):\n",
    "    # 使用PyTorch内置的优化器和损失函数\n",
    "            updater.zero_grad()\n",
    "            l.mean().backward()\n",
    "            updater.step()\n",
    "        else:\n",
    "            # 使用定制的优化器和损失函数\n",
    "            l.sum().backward()\n",
    "            updater(X.shape[0])\n",
    "        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())\n",
    "# 返回训练损失和训练精度\n",
    "    return metric[0] / metric[2], metric[1] / metric[2]"
   ],
   "id": "2faf1dcc6c023a53",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "class Animator: #@save\n",
    "    \"\"\"在动画中绘制数据\"\"\"\n",
    "    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,\n",
    "                ylim=None, xscale='linear', yscale='linear',\n",
    "                fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,\n",
    "                figsize=(3.5, 2.5)):\n",
    "    # 增量地绘制多条线\n",
    "        if legend is None:\n",
    "            legend = []\n",
    "        use_svg_display()\n",
    "        self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)\n",
    "        if nrows * ncols == 1:\n",
    "            self.axes = [self.axes, ]\n",
    "        # 使用lambda函数捕获参数\n",
    "        self.config_axes = lambda: set_axes(\n",
    "            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)\n",
    "        self.X, self.Y, self.fmts = None, None, fmts\n",
    "    def add(self, x, y):\n",
    "    # 向图表中添加多个数据点\n",
    "        if not hasattr(y, \"__len__\"):\n",
    "            y = [y]\n",
    "        n = len(y)\n",
    "        if not hasattr(x, \"__len__\"):\n",
    "            x = [x] * n\n",
    "        if not self.X:\n",
    "            self.X = [[] for _ in range(n)]\n",
    "        if not self.Y:\n",
    "            self.Y = [[] for _ in range(n)]\n",
    "        for i, (a, b) in enumerate(zip(x, y)):\n",
    "            if a is not None and b is not None:\n",
    "                self.X[i].append(a)\n",
    "                self.Y[i].append(b)\n",
    "                self.axes[0].cla()\n",
    "        for x, y, fmt in zip(self.X, self.Y, self.fmts):\n",
    "            self.axes[0].plot(x, y, fmt)\n",
    "        self.config_axes()\n",
    "        display.display(self.fig)\n",
    "        display.clear_output(wait=True)\n"
   ],
   "id": "7cd5367ab43c5e5f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): #@save\n",
    "    \"\"\"训练模型（定义见第3章）\"\"\"\n",
    "    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],\n",
    "        legend=['train loss', 'train acc', 'test acc'])\n",
    "    for epoch in range(num_epochs):\n",
    "        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)\n",
    "        test_acc = evaluate_accuracy(net, test_iter)\n",
    "        animator.add(epoch + 1, train_metrics + (test_acc,))\n",
    "    train_loss, train_acc = train_metrics\n",
    "    assert train_loss < 0.5, train_loss\n",
    "    assert train_acc <= 1 and train_acc > 0.7, train_acc\n",
    "    assert test_acc <= 1 and test_acc > 0.7, test_acc"
   ],
   "id": "b02a143c75fad40",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "lr = 0.1\n",
    "def updater(batch_size):\n",
    "    return sgd([W, b], lr, batch_size)"
   ],
   "id": "6a97b70779276b61",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "num_epochs = 10\n",
    "#train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)"
   ],
   "id": "df3cceb72faee402",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def predict_ch3(net, test_iter, n=6): #@save\n",
    "    \"\"\"预测标签（定义见第3章）\"\"\"\n",
    "    for (X, y),i in zip(test_iter,range(1)):\n",
    "        trues = get_fashion_mnist_labels(y)\n",
    "        preds = get_fashion_mnist_labels(net(X).argmax(axis=1))\n",
    "        titles = [true +'\\n' + pred for true, pred in zip(trues, preds)]\n",
    "        show_images(\n",
    "                X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])\n",
    "\n",
    "predict_ch3(net, test_iter)\n"
   ],
   "id": "94f6177bfb40eece",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "batch_size = 256\n",
    "train_iter, test_iter = load_data_fashion_mnist(batch_size)"
   ],
   "id": "4a0bfd0479ec7386",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))\n",
    "def init_weights(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        nn.init.normal_(m.weight, std=0.01)\n",
    "\n",
    "net.apply(init_weights);\n",
    "loss = nn.CrossEntropyLoss(reduction='none')\n",
    "trainer = torch.optim.SGD(net.parameters(), lr=0.1)\n",
    "num_epochs = 10\n",
    "train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)"
   ],
   "id": "b9808d88f5e6827b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)\n",
    "y = torch.relu(x)\n",
    "plot(x.detach(), y.detach(), 'x', 'relu(x)', figsize=(5, 2.5))"
   ],
   "id": "c25dd146307f58e0",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "y.backward(torch.ones_like(x), retain_graph=True)\n",
    "plot(x.detach(), x.grad, 'x', 'grad of relu', figsize=(5, 2.5))"
   ],
   "id": "f96acd2015dccb38",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "y = torch.sigmoid(x)\n",
    "plot(x.detach(), y.detach(), 'x', 'sigmoid(x)', figsize=(5, 2.5))"
   ],
   "id": "74013cea59cd8be3",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "x.grad.data.zero_()\n",
    "y.backward(torch.ones_like(x),retain_graph=True)\n",
    "plot(x.detach(), x.grad, 'x', 'grad of sigmoid', figsize=(5, 2.5))"
   ],
   "id": "6a0b4f529bf9cc5c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "batch_size = 256\n",
    "train_iter, test_iter = load_data_fashion_mnist(batch_size)"
   ],
   "id": "f1de998439b1b9f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "num_inputs,num_outputs,num_hiddens = 784, 10, 256\n",
    "W1=nn.Parameter(torch.randn(num_inputs,num_hiddens,requires_grad=True)*0.01)\n",
    "b1=nn.Parameter(torch.zeros(num_hiddens,requires_grad=True))\n",
    "W2 = nn.Parameter(torch.randn(num_hiddens,num_outputs,requires_grad=True)*0.01)\n",
    "b2=nn.Parameter(torch.zeros(num_outputs,requires_grad=True))\n",
    "params=[W1,b1,W2,b2]\n"
   ],
   "id": "adcea8cd4ee792a8",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "def relu(X):\n",
    "    a = torch.zeros_like(X)\n",
    "    return torch.max(X,a)\n",
    "def net(X):\n",
    "    X = X.reshape((-1,num_inputs))\n",
    "    H = relu(X@W1+b1)\n",
    "    return (H@W2+b2)\n",
    "loss = nn.CrossEntropyLoss(reduction='none')\n",
    "num_epochs,lr=10,0.05\n",
    "updater=torch.optim.SGD(params,lr=lr)\n",
    "#train_ch3(net,train_iter,test_iter,loss,num_epochs,updater)\n"
   ],
   "id": "cfd81a0f16d0c573",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "predict_ch3(net, test_iter)",
   "id": "f2ed2e9cee14c28a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "net = nn.Sequential(nn.Flatten(),\n",
    "                    nn.Linear(784,256),\n",
    "                    nn.ReLU(),\n",
    "                    nn.Linear(256,10))\n",
    "def init_weights(m):\n",
    "    if type(m) == nn.Linear:\n",
    "        nn.init.normal_(m.weight,std=0.01)\n",
    "\n",
    "net.apply(init_weights)\n"
   ],
   "id": "9e4ed6d103380bc7",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "batch_size,lr, num_epochs=256,0.1,10\n",
    "loss = nn.CrossEntropyLoss(reduction='none')\n",
    "trainer = torch.optim.SGD(net.parameters(),lr=lr)\n",
    "train_iter, test_iter = load_data_fashion_mnist(batch_size)\n",
    "#train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)"
   ],
   "id": "52d71c77c4f51e90",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "",
   "id": "94706c936b4be3e1",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}