Merge branch 'Resnet34-test'
This commit is contained in:
commit
2f4e9df26e
4 changed files with 99 additions and 82 deletions
50
Curve.py
Normal file
50
Curve.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
"""
|
||||
plot_training_curves.py
|
||||
从 training_log.csv 读取日志,绘制 Loss / F1 / Accuracy / LR 曲线
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib
|
||||
|
||||
matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
||||
matplotlib.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# ============ 读取数据 ============
|
||||
df = pd.read_csv('training_log.csv')
|
||||
best_rows = df[df['best'] == 'best']
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||
|
||||
# ---- 1. Loss ----
|
||||
ax = axes[0, 0]
|
||||
ax.plot(df['epoch'], df['train_loss'], label='Train Loss', color='#1f77b4', lw=1.5)
|
||||
ax.plot(df['epoch'], df['val_loss'], label='Val Loss', color='#ff7f0e', lw=1.5)
|
||||
ax.set_xlabel('Epoch'); ax.set_ylabel('Loss'); ax.set_title('Loss vs Epoch')
|
||||
ax.legend(); ax.grid(True, alpha=0.3)
|
||||
|
||||
# ---- 2. F1 Score ----
|
||||
ax = axes[0, 1]
|
||||
ax.plot(df['epoch'], df['train_f1'], label='Train F1', color='#1f77b4', lw=1.5)
|
||||
ax.plot(df['epoch'], df['val_f1'], label='Val F1', color='#ff7f0e', lw=1.5)
|
||||
ax.set_xlabel('Epoch'); ax.set_ylabel('F1 Score'); ax.set_title('F1 Score vs Epoch')
|
||||
ax.legend(); ax.grid(True, alpha=0.3)
|
||||
|
||||
# ---- 3. Accuracy ----
|
||||
ax = axes[1, 0]
|
||||
ax.plot(df['epoch'], df['train_acc'], label='Train Acc', color='#1f77b4', lw=1.5)
|
||||
ax.plot(df['epoch'], df['val_acc'], label='Val Acc', color='#ff7f0e', lw=1.5)
|
||||
ax.set_xlabel('Epoch'); ax.set_ylabel('Accuracy (%)'); ax.set_title('Accuracy vs Epoch')
|
||||
ax.legend(); ax.grid(True, alpha=0.3)
|
||||
|
||||
# ---- 4. Learning Rate ----
|
||||
ax = axes[1, 1]
|
||||
ax.plot(df['epoch'], df['lr'], color='#2ca02c', lw=1.5)
|
||||
ax.set_xlabel('Epoch'); ax.set_ylabel('Learning Rate'); ax.set_title('Learning Rate vs Epoch')
|
||||
ax.ticklabel_format(style='scientific', axis='y', scilimits=(0, 0))
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('training_curves.png', dpi=150, bbox_inches='tight')
|
||||
plt.show()
|
||||
print("训练曲线已保存: training_curves.png")
|
||||
|
|
@ -28,7 +28,7 @@ matplotlib.rcParams['axes.unicode_minus'] = False
|
|||
# ============================================================
|
||||
MODEL_PATH = 'best_model.pth' # 模型权重路径
|
||||
DATA_ROOT = '../trash_division_data/ultimate_4_class/' # 数据集根目录
|
||||
BATCH_SIZE = 64
|
||||
BATCH_SIZE = 32
|
||||
IMAGE_SIZE = 256
|
||||
NUM_WORKERS = 4
|
||||
# ============================================================
|
||||
|
|
@ -54,8 +54,7 @@ num_classes = len(class_names)
|
|||
print(f"类别: {class_names}")
|
||||
|
||||
# ---------- 2. 加载模型 ----------
|
||||
device = torch.device('xpu' if torch.xpu.is_available() else 'cpu')
|
||||
print(device)
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
model = Net(num_classes=num_classes)
|
||||
state_dict = torch.load(MODEL_PATH, map_location=device)
|
||||
if 'model_state_dict' in state_dict:
|
||||
|
|
|
|||
95
Model.py
95
Model.py
|
|
@ -1,6 +1,5 @@
|
|||
"""
|
||||
模型定义文件 - 使用瓶颈结构 (Bottleneck) 的深度残差网络
|
||||
目标:约50层,参数量约80M
|
||||
模型定义文件 - ResNet-34
|
||||
author : yukun-hh
|
||||
date : 2026-4-10
|
||||
"""
|
||||
|
|
@ -10,27 +9,19 @@ from torch.nn import functional as F
|
|||
from torchsummary import summary
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
class BasicBlock(nn.Module):
|
||||
"""
|
||||
瓶颈残差块:1x1(降维) -> 3x3 -> 1x1(升维)
|
||||
ResNet-34 基础残差块:3x3 -> 3x3
|
||||
若需要下采样或通道变化,则在跳跃连接中使用 1x1 卷积
|
||||
"""
|
||||
expansion = 4 # 输出通道是中间通道的4倍
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, in_channels, mid_channels, stride=1, downsample=None):
|
||||
"""
|
||||
:param in_channels: 输入通道数
|
||||
:param mid_channels: 中间层通道数(1x1降维后的通道数)
|
||||
:param stride: 步长,用于下采样
|
||||
:param downsample: 下采样模块(当stride≠1或通道变化时使用)
|
||||
"""
|
||||
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(mid_channels)
|
||||
self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(mid_channels)
|
||||
self.conv3 = nn.Conv2d(mid_channels, mid_channels * self.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(mid_channels * self.expansion)
|
||||
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
|
||||
|
|
@ -43,10 +34,6 @@ class Bottleneck(nn.Module):
|
|||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
|
@ -57,68 +44,49 @@ class Bottleneck(nn.Module):
|
|||
|
||||
|
||||
class Net(nn.Module):
|
||||
"""
|
||||
基于 Bottleneck 的 ResNet 风格模型
|
||||
各阶段配置仿照 ResNet-50,适当调整宽度以达到约80M参数
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=4):
|
||||
def __init__(self, num_classes=4, dropout=0.5):
|
||||
super().__init__()
|
||||
|
||||
# 第一阶段:7x7卷积 + 最大池化
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
# 残差阶段定义
|
||||
# 每个阶段的参数:[块数, 中间通道数, 步长]
|
||||
# 为了达到80M参数,我们略微加宽网络(相比标准ResNet-50)
|
||||
layers_config = [
|
||||
(3, 64, 1), # stage2: 3个瓶颈块,输出通道 64*4=256
|
||||
(4, 128, 2), # stage3: 4个瓶颈块,输出通道 128*4=512
|
||||
(14, 256, 2), # stage4: 14个瓶颈块,输出通道 256*4=1024(加深至此阶段)
|
||||
(3, 512, 2) # stage5: 3个瓶颈块,输出通道 512*4=2048
|
||||
(3, 64, 1), # layer1
|
||||
(4, 128, 2), # layer2
|
||||
(6, 256, 2), # layer3
|
||||
(3, 512, 2), # layer4
|
||||
]
|
||||
|
||||
self.in_channels = 64
|
||||
self.stage2 = self._make_layer(layers_config[0])
|
||||
self.stage3 = self._make_layer(layers_config[1])
|
||||
self.stage4 = self._make_layer(layers_config[2])
|
||||
self.stage5 = self._make_layer(layers_config[3])
|
||||
self.layer1 = self._make_layer(layers_config[0])
|
||||
self.layer2 = self._make_layer(layers_config[1])
|
||||
self.layer3 = self._make_layer(layers_config[2])
|
||||
self.layer4 = self._make_layer(layers_config[3])
|
||||
|
||||
# 全局池化与分类层
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(2048, num_classes)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.fc = nn.Linear(512, num_classes)
|
||||
|
||||
def _make_layer(self, config):
|
||||
"""
|
||||
构建一个残差阶段
|
||||
:param config: (块数, 中间通道数, 第一阶段步长)
|
||||
:return: nn.Sequential
|
||||
"""
|
||||
num_blocks, mid_channels, stride = config
|
||||
num_blocks, out_channels, stride = config
|
||||
downsample = None
|
||||
layers = []
|
||||
|
||||
# 第一个块可能需要下采样和通道匹配
|
||||
if stride != 1 or self.in_channels != mid_channels * Bottleneck.expansion:
|
||||
if stride != 1 or self.in_channels != out_channels:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.in_channels, mid_channels * Bottleneck.expansion,
|
||||
nn.Conv2d(self.in_channels, out_channels,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(mid_channels * Bottleneck.expansion),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
)
|
||||
|
||||
layers.append(
|
||||
Bottleneck(self.in_channels, mid_channels, stride, downsample)
|
||||
)
|
||||
self.in_channels = mid_channels * Bottleneck.expansion
|
||||
layers.append(BasicBlock(self.in_channels, out_channels, stride, downsample))
|
||||
self.in_channels = out_channels
|
||||
|
||||
# 后续块
|
||||
for _ in range(1, num_blocks):
|
||||
layers.append(
|
||||
Bottleneck(self.in_channels, mid_channels)
|
||||
)
|
||||
layers.append(BasicBlock(self.in_channels, out_channels))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
|
@ -128,13 +96,14 @@ class Net(nn.Module):
|
|||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.stage2(x)
|
||||
x = self.stage3(x)
|
||||
x = self.stage4(x)
|
||||
x = self.stage5(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.dropout(x)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
|
|
|||
27
README.md
27
README.md
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
> 同济大学 Python 人工智能程序设计课程小组作业
|
||||
|
||||
基于自定义 ResNet 风格 Bottleneck 架构的 CNN 模型(约 80M 参数),将生活垃圾分为厨余垃圾、可回收物、其他垃圾、有害垃圾四个类别,输入为 256×256 RGB 图像。
|
||||
基于 ResNet-34 架构的 CNN 模型(约 21M 参数),将生活垃圾分为厨余垃圾、可回收物、其他垃圾、有害垃圾四个类别,输入为 256×256 RGB 图像。
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -25,7 +25,7 @@
|
|||
## 项目特点
|
||||
|
||||
- **四类垃圾分类**:厨余垃圾(1)、可回收物(2)、其他垃圾(3)、有害垃圾(4)
|
||||
- **自定义 ResNet Bottleneck 架构**:约 80M 参数,50 层深度残差网络
|
||||
- **ResNet-34 架构**:约 21M 参数,34 层深度残差网络,含 Dropout 正则化
|
||||
- **数据增强**:训练时使用随机裁剪、水平翻转、旋转、色彩抖动
|
||||
- **Macro-F1 评估**:采用宏平均 F1 分数作为主要评估指标,兼顾各类别表现
|
||||
- **类别加权损失**:自动计算类别权重,缓解类别不平衡问题
|
||||
|
|
@ -35,28 +35,27 @@
|
|||
|
||||
## 模型架构
|
||||
|
||||
模型基于残差网络(ResNet)的 Bottleneck 构建块设计。
|
||||
模型基于标准 ResNet-34 架构,使用 BasicBlock 构建。
|
||||
|
||||
### Bottleneck 块
|
||||
### BasicBlock 块
|
||||
|
||||
每个 Bottleneck 块包含三个卷积层:
|
||||
每个 BasicBlock 包含两个 3x3 卷积层 + 跳跃连接:
|
||||
|
||||
| 层 | 卷积 | 作用 |
|
||||
|---|---|---|
|
||||
| 1x1 Conv | 降维 | 减少通道数,降低计算量 |
|
||||
| 3x3 Conv | 特征提取 | 核心卷积操作 |
|
||||
| 1x1 Conv | 升维 (x4) | 恢复通道数至输入的 4 倍 |
|
||||
| 3x3 Conv | 特征提取 | 第一层卷积 |
|
||||
| 3x3 Conv | 特征提取 | 第二层卷积 |
|
||||
|
||||
### 网络结构
|
||||
|
||||
| 阶段 | 块数 | 输出通道数 | 说明 |
|
||||
|---|---|---|---|
|
||||
| 初始层 | - | 64 | 7x7 Conv, stride=2 + MaxPool |
|
||||
| Stage 1 | 3 | 256 | 第一个残差阶段 |
|
||||
| Stage 2 | 4 | 512 | - |
|
||||
| Stage 3 | 14 | 1024 | 最深阶段(比 ResNet-50 加深) |
|
||||
| Stage 4 | 3 | 2048 | 最终残差阶段 |
|
||||
| 分类头 | - | 4 | 全局平均池化 + 全连接层 |
|
||||
| Layer1 | 3 | 64 | 第一个残差阶段 |
|
||||
| Layer2 | 4 | 128 | - |
|
||||
| Layer3 | 6 | 256 | - |
|
||||
| Layer4 | 3 | 512 | 最终残差阶段 |
|
||||
| 分类头 | - | 4 | 全局平均池化 + Dropout + 全连接层 |
|
||||
|
||||
## 数据集
|
||||
|
||||
|
|
@ -111,7 +110,7 @@
|
|||
|---|---|
|
||||
| `Train.py` | 训练主脚本,包含训练循环、验证、评估 |
|
||||
| `Dataloader.py` | 数据加载模块,包含 RobustImageFolder 和 DataLoader 创建 |
|
||||
| `Model.py` | 模型定义,Bottleneck 残差块 + Net 主模型 |
|
||||
| `Model.py` | 模型定义,ResNet-34(BasicBlock)+ Dropout |
|
||||
| `Merge_classes.py` | 数据集预处理,265 类合并为 4 类 |
|
||||
| `best_model.pth` | 训练好的最佳模型权重(约 125 MB) |
|
||||
| `AGENTS.md` | AI 助手指南(开发辅助) |
|
||||
|
|
|
|||
Loading…
Reference in a new issue