From 4575f3390fc6f95e4173f3644e8bcc77d9b92423 Mon Sep 17 00:00:00 2001 From: yukun-hh Date: Tue, 12 May 2026 15:56:28 +0800 Subject: [PATCH 1/2] refactor: replace custom Bottleneck model with standard ResNet-34 + Dropout --- Model.py | 99 +++++++++++++++++++------------------------------------ README.md | 27 ++++++++------- 2 files changed, 47 insertions(+), 79 deletions(-) diff --git a/Model.py b/Model.py index a28aa78..bf1a714 100644 --- a/Model.py +++ b/Model.py @@ -1,6 +1,5 @@ """ -模型定义文件 - 使用瓶颈结构 (Bottleneck) 的深度残差网络 -目标:约50层,参数量约80M +模型定义文件 - ResNet-34 author : yukun-hh date : 2026-4-10 """ @@ -10,27 +9,19 @@ from torch.nn import functional as F from torchsummary import summary -class Bottleneck(nn.Module): +class BasicBlock(nn.Module): """ - 瓶颈残差块:1x1(降维) -> 3x3 -> 1x1(升维) - 若需要下采样或通道变化,则在跳跃连接中使用1x1卷积 + ResNet-34 基础残差块:3x3 -> 3x3 + 若需要下采样或通道变化,则在跳跃连接中使用 1x1 卷积 """ - expansion = 4 # 输出通道是中间通道的4倍 + expansion = 1 - def __init__(self, in_channels, mid_channels, stride=1, downsample=None): - """ - :param in_channels: 输入通道数 - :param mid_channels: 中间层通道数(1x1降维后的通道数) - :param stride: 步长,用于下采样 - :param downsample: 下采样模块(当stride≠1或通道变化时使用) - """ + def __init__(self, in_channels, out_channels, stride=1, downsample=None): super().__init__() - self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(mid_channels) - self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(mid_channels) - self.conv3 = nn.Conv2d(mid_channels, mid_channels * self.expansion, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(mid_channels * self.expansion) + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(out_channels) + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) self.downsample = downsample @@ -43,10 +34,6 @@ class Bottleneck(nn.Module): out = self.conv2(out) out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) if self.downsample is not None: identity = self.downsample(x) @@ -57,68 +44,49 @@ class Bottleneck(nn.Module): class Net(nn.Module): - """ - 基于 Bottleneck 的 ResNet 风格模型 - 各阶段配置仿照 ResNet-50,适当调整宽度以达到约80M参数 - """ - def __init__(self, num_classes=4): + def __init__(self, num_classes=4, dropout=0.5): super().__init__() - # 第一阶段:7x7卷积 + 最大池化 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - # 残差阶段定义 - # 每个阶段的参数:[块数, 中间通道数, 步长] - # 为了达到80M参数,我们略微加宽网络(相比标准ResNet-50) layers_config = [ - (3, 64, 1), # stage2: 3个瓶颈块,输出通道 64*4=256 - (4, 128, 2), # stage3: 4个瓶颈块,输出通道 128*4=512 - (14, 256, 2), # stage4: 14个瓶颈块,输出通道 256*4=1024(加深至此阶段) - (3, 512, 2) # stage5: 3个瓶颈块,输出通道 512*4=2048 + (3, 64, 1), # layer1 + (4, 128, 2), # layer2 + (6, 256, 2), # layer3 + (3, 512, 2), # layer4 ] self.in_channels = 64 - self.stage2 = self._make_layer(layers_config[0]) - self.stage3 = self._make_layer(layers_config[1]) - self.stage4 = self._make_layer(layers_config[2]) - self.stage5 = self._make_layer(layers_config[3]) + self.layer1 = self._make_layer(layers_config[0]) + self.layer2 = self._make_layer(layers_config[1]) + self.layer3 = self._make_layer(layers_config[2]) + self.layer4 = self._make_layer(layers_config[3]) - # 全局池化与分类层 self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - self.fc = nn.Linear(2048, num_classes) + self.dropout = nn.Dropout(dropout) + self.fc = nn.Linear(512, num_classes) def _make_layer(self, config): - """ - 构建一个残差阶段 - :param config: (块数, 中间通道数, 第一阶段步长) - :return: nn.Sequential - """ - num_blocks, mid_channels, stride = config + num_blocks, out_channels, stride = config downsample = None layers = [] - # 第一个块可能需要下采样和通道匹配 - if stride != 1 or self.in_channels != mid_channels * Bottleneck.expansion: + if stride != 1 or self.in_channels != out_channels: downsample = nn.Sequential( - nn.Conv2d(self.in_channels, mid_channels * Bottleneck.expansion, + nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(mid_channels * Bottleneck.expansion), + nn.BatchNorm2d(out_channels), ) - layers.append( - Bottleneck(self.in_channels, mid_channels, stride, downsample) - ) - self.in_channels = mid_channels * Bottleneck.expansion + layers.append(BasicBlock(self.in_channels, out_channels, stride, downsample)) + self.in_channels = out_channels - # 后续块 for _ in range(1, num_blocks): - layers.append( - Bottleneck(self.in_channels, mid_channels) - ) + layers.append(BasicBlock(self.in_channels, out_channels)) return nn.Sequential(*layers) @@ -128,17 +96,18 @@ class Net(nn.Module): x = self.relu(x) x = self.maxpool(x) - x = self.stage2(x) - x = self.stage3(x) - x = self.stage4(x) - x = self.stage5(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) x = self.avgpool(x) x = torch.flatten(x, 1) + x = self.dropout(x) x = self.fc(x) return x if __name__ == '__main__': model = Net(num_classes=4) - summary(model, input_size=(3, 256, 256)) \ No newline at end of file + summary(model, input_size=(3, 256, 256)) diff --git a/README.md b/README.md index c3cc2dc..7e89626 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ > 同济大学 Python 人工智能程序设计课程小组作业 -基于自定义 ResNet 风格 Bottleneck 架构的 CNN 模型(约 80M 参数),将生活垃圾分为厨余垃圾、可回收物、其他垃圾、有害垃圾四个类别,输入为 256×256 RGB 图像。 +基于 ResNet-34 架构的 CNN 模型(约 21M 参数),将生活垃圾分为厨余垃圾、可回收物、其他垃圾、有害垃圾四个类别,输入为 256×256 RGB 图像。 --- @@ -25,7 +25,7 @@ ## 项目特点 - **四类垃圾分类**:厨余垃圾(1)、可回收物(2)、其他垃圾(3)、有害垃圾(4) -- **自定义 ResNet Bottleneck 架构**:约 80M 参数,50 层深度残差网络 +- **ResNet-34 架构**:约 21M 参数,34 层深度残差网络,含 Dropout 正则化 - **数据增强**:训练时使用随机裁剪、水平翻转、旋转、色彩抖动 - **Macro-F1 评估**:采用宏平均 F1 分数作为主要评估指标,兼顾各类别表现 - **类别加权损失**:自动计算类别权重,缓解类别不平衡问题 @@ -35,28 +35,27 @@ ## 模型架构 -模型基于残差网络(ResNet)的 Bottleneck 构建块设计。 +模型基于标准 ResNet-34 架构,使用 BasicBlock 构建。 -### Bottleneck 块 +### BasicBlock 块 -每个 Bottleneck 块包含三个卷积层: +每个 BasicBlock 包含两个 3x3 卷积层 + 跳跃连接: | 层 | 卷积 | 作用 | |---|---|---| -| 1x1 Conv | 降维 | 减少通道数,降低计算量 | -| 3x3 Conv | 特征提取 | 核心卷积操作 | -| 1x1 Conv | 升维 (x4) | 恢复通道数至输入的 4 倍 | +| 3x3 Conv | 特征提取 | 第一层卷积 | +| 3x3 Conv | 特征提取 | 第二层卷积 | ### 网络结构 | 阶段 | 块数 | 输出通道数 | 说明 | |---|---|---|---| | 初始层 | - | 64 | 7x7 Conv, stride=2 + MaxPool | -| Stage 1 | 3 | 256 | 第一个残差阶段 | -| Stage 2 | 4 | 512 | - | -| Stage 3 | 14 | 1024 | 最深阶段(比 ResNet-50 加深) | -| Stage 4 | 3 | 2048 | 最终残差阶段 | -| 分类头 | - | 4 | 全局平均池化 + 全连接层 | +| Layer1 | 3 | 64 | 第一个残差阶段 | +| Layer2 | 4 | 128 | - | +| Layer3 | 6 | 256 | - | +| Layer4 | 3 | 512 | 最终残差阶段 | +| 分类头 | - | 4 | 全局平均池化 + Dropout + 全连接层 | ## 数据集 @@ -111,7 +110,7 @@ |---|---| | `Train.py` | 训练主脚本,包含训练循环、验证、评估 | | `Dataloader.py` | 数据加载模块,包含 RobustImageFolder 和 DataLoader 创建 | -| `Model.py` | 模型定义,Bottleneck 残差块 + Net 主模型 | +| `Model.py` | 模型定义,ResNet-34(BasicBlock)+ Dropout | | `Merge_classes.py` | 数据集预处理,265 类合并为 4 类 | | `best_model.pth` | 训练好的最佳模型权重(约 125 MB) | | `AGENTS.md` | AI 助手指南(开发辅助) | From ce0c6da36a0f447e3e7f316cba8e1c17c02ae8af Mon Sep 17 00:00:00 2001 From: ywd09 Date: Thu, 14 May 2026 00:37:04 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=AF=84=E4=BC=B0?= =?UTF-8?q?=E7=A8=8B=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Curve.py | 50 ++++++++++++++++++ Evaluate.py | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 Curve.py create mode 100644 Evaluate.py diff --git a/Curve.py b/Curve.py new file mode 100644 index 0000000..7c38b37 --- /dev/null +++ b/Curve.py @@ -0,0 +1,50 @@ +""" +plot_training_curves.py +从 training_log.csv 读取日志,绘制 Loss / F1 / Accuracy / LR 曲线 +""" + +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib + +matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans'] +matplotlib.rcParams['axes.unicode_minus'] = False + +# ============ 读取数据 ============ +df = pd.read_csv('training_log.csv') +best_rows = df[df['best'] == 'best'] + +fig, axes = plt.subplots(2, 2, figsize=(14, 10)) + +# ---- 1. Loss ---- +ax = axes[0, 0] +ax.plot(df['epoch'], df['train_loss'], label='Train Loss', color='#1f77b4', lw=1.5) +ax.plot(df['epoch'], df['val_loss'], label='Val Loss', color='#ff7f0e', lw=1.5) +ax.set_xlabel('Epoch'); ax.set_ylabel('Loss'); ax.set_title('Loss vs Epoch') +ax.legend(); ax.grid(True, alpha=0.3) + +# ---- 2. F1 Score ---- +ax = axes[0, 1] +ax.plot(df['epoch'], df['train_f1'], label='Train F1', color='#1f77b4', lw=1.5) +ax.plot(df['epoch'], df['val_f1'], label='Val F1', color='#ff7f0e', lw=1.5) +ax.set_xlabel('Epoch'); ax.set_ylabel('F1 Score'); ax.set_title('F1 Score vs Epoch') +ax.legend(); ax.grid(True, alpha=0.3) + +# ---- 3. Accuracy ---- +ax = axes[1, 0] +ax.plot(df['epoch'], df['train_acc'], label='Train Acc', color='#1f77b4', lw=1.5) +ax.plot(df['epoch'], df['val_acc'], label='Val Acc', color='#ff7f0e', lw=1.5) +ax.set_xlabel('Epoch'); ax.set_ylabel('Accuracy (%)'); ax.set_title('Accuracy vs Epoch') +ax.legend(); ax.grid(True, alpha=0.3) + +# ---- 4. Learning Rate ---- +ax = axes[1, 1] +ax.plot(df['epoch'], df['lr'], color='#2ca02c', lw=1.5) +ax.set_xlabel('Epoch'); ax.set_ylabel('Learning Rate'); ax.set_title('Learning Rate vs Epoch') +ax.ticklabel_format(style='scientific', axis='y', scilimits=(0, 0)) +ax.grid(True, alpha=0.3) + +plt.tight_layout() +plt.savefig('training_curves.png', dpi=150, bbox_inches='tight') +plt.show() +print("训练曲线已保存: training_curves.png") diff --git a/Evaluate.py b/Evaluate.py new file mode 100644 index 0000000..7c78f1c --- /dev/null +++ b/Evaluate.py @@ -0,0 +1,147 @@ +""" +evaluate_and_plot.py +加载模型,在验证集上推理,绘制混淆矩阵 / ROC / PR 曲线 +""" + +import os +import numpy as np +import matplotlib.pyplot as plt +import matplotlib + +import torch +from torch.utils.data import DataLoader +from torchvision import transforms +from sklearn.metrics import ( + confusion_matrix, ConfusionMatrixDisplay, + roc_curve, auc, + precision_recall_curve, average_precision_score, +) + +from Model import Net +from Dataloader import RobustImageFolder + +matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans'] +matplotlib.rcParams['axes.unicode_minus'] = False + +# ============================================================ +# ★★★ 需要你修改的参数 ★★★ +# ============================================================ +MODEL_PATH = 'best_model.pth' # 模型权重路径 +DATA_ROOT = '../trash_division_data/ultimate_4_class/' # 数据集根目录 +BATCH_SIZE = 32 +IMAGE_SIZE = 256 +NUM_WORKERS = 4 +# ============================================================ + +# ---------- 1. 加载验证集 ---------- +val_transform = transforms.Compose([ + transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), +]) + +val_dataset = RobustImageFolder( + root=os.path.join(DATA_ROOT, 'val'), + transform=val_transform, +) +val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, + shuffle=False, num_workers=NUM_WORKERS, + pin_memory=True, drop_last=False) + +class_names = val_dataset.classes +num_classes = len(class_names) +print(f"类别: {class_names}") + +# ---------- 2. 加载模型 ---------- +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +model = Net(num_classes=num_classes) +state_dict = torch.load(MODEL_PATH, map_location=device) +if 'model_state_dict' in state_dict: + state_dict = state_dict['model_state_dict'] +elif 'model' in state_dict: + state_dict = state_dict['model'] +model.load_state_dict(state_dict) +model = model.to(device).eval() +print("模型加载完成") + +# ---------- 3. 推理 ---------- +all_labels = [] +all_probs = [] + +with torch.no_grad(): + for images, labels in val_loader: + images = images.to(device) + probs = torch.softmax(model(images), dim=1) + all_labels.append(labels.numpy()) + all_probs.append(probs.cpu().numpy()) + +all_labels = np.concatenate(all_labels) +all_probs = np.concatenate(all_probs) +all_preds = np.argmax(all_probs, axis=1) +print(f"推理完成, 共 {len(all_labels)} 样本") + +# ============================================================ +# ① 混淆矩阵 +# ============================================================ +cm = confusion_matrix(all_labels, all_preds) +fig, ax = plt.subplots(figsize=(8, 7)) +ConfusionMatrixDisplay(cm, display_labels=class_names).plot( + ax=ax, cmap='Blues', values_format='d', xticks_rotation=30) +ax.set_title('Confusion Matrix', fontsize=14) +plt.tight_layout() +plt.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight') +plt.show() +print("混淆矩阵已保存: confusion_matrix.png") + +# ============================================================ +# ② ROC 曲线 (One-vs-Rest + Macro-average) +# ============================================================ +one_hot = np.eye(num_classes)[all_labels] +colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'] + +fig, ax = plt.subplots(figsize=(8, 7)) +fpr_d, tpr_d, auc_d = {}, {}, {} + +for i in range(num_classes): + fpr_d[i], tpr_d[i], _ = roc_curve(one_hot[:, i], all_probs[:, i]) + auc_d[i] = auc(fpr_d[i], tpr_d[i]) + ax.plot(fpr_d[i], tpr_d[i], color=colors[i], lw=2, + label=f'{class_names[i]} (AUC={auc_d[i]:.4f})') + +# Macro-average +all_fpr = np.unique(np.concatenate([fpr_d[i] for i in range(num_classes)])) +mean_tpr = sum(np.interp(all_fpr, fpr_d[i], tpr_d[i]) for i in range(num_classes)) / num_classes +macro_auc = auc(all_fpr, mean_tpr) +ax.plot(all_fpr, mean_tpr, 'navy', lw=2, ls='--', + label=f'Macro-avg (AUC={macro_auc:.4f})') +ax.plot([0, 1], [0, 1], 'k--', lw=1, alpha=0.5) + +ax.set_xlim(0, 1); ax.set_ylim(0, 1.05) +ax.set_xlabel('False Positive Rate'); ax.set_ylabel('True Positive Rate') +ax.set_title('ROC Curve', fontsize=14) +ax.legend(loc='lower right'); ax.grid(True, alpha=0.3) +plt.tight_layout() +plt.savefig('roc_curve.png', dpi=150, bbox_inches='tight') +plt.show() +print("ROC 曲线已保存: roc_curve.png") + +# ============================================================ +# ③ Precision-Recall 曲线 +# ============================================================ +fig, ax = plt.subplots(figsize=(8, 7)) + +for i in range(num_classes): + prec, rec, _ = precision_recall_curve(one_hot[:, i], all_probs[:, i]) + ap = average_precision_score(one_hot[:, i], all_probs[:, i]) + ax.plot(rec, prec, color=colors[i], lw=2, + label=f'{class_names[i]} (AP={ap:.4f})') + +ax.set_xlim(0, 1); ax.set_ylim(0, 1.05) +ax.set_xlabel('Recall'); ax.set_ylabel('Precision') +ax.set_title('Precision-Recall Curve', fontsize=14) +ax.legend(loc='best'); ax.grid(True, alpha=0.3) +plt.tight_layout() +plt.savefig('pr_curve.png', dpi=150, bbox_inches='tight') +plt.show() +print("PR 曲线已保存: pr_curve.png")