模型提升到50层

This commit is contained in:
yukun-hh 2026-04-16 14:12:25 +08:00
parent b2f7a9c172
commit cb6bdc7eb8
2 changed files with 113 additions and 78 deletions

184
Model.py
View file

@ -1,10 +1,8 @@
""" """
这个文件是模型的定义文件请不要擅自修改如有疑问微信群里反馈 模型定义文件 - 使用瓶颈结构 (Bottleneck) 的深度残差网络
单独运行本文件将会输出模型结构 目标约50层参数量约80M
目前的话是一个36层的模型模型总量应该是在80M左右 如果到时候还是欠拟合的话再考虑去做更深的结构
author : yukun-hh author : yukun-hh
date : 2026-4-10 date : 2026-4-10
""" """
import torch import torch
from torch import nn from torch import nn
@ -12,97 +10,135 @@ from torch.nn import functional as F
from torchsummary import summary from torchsummary import summary
# 残差块 class Bottleneck(nn.Module):
class Resblock(nn.Module):
def __init__(self, input_channels, output_channels, use_1x1conv=False, strides=1):
""" """
:param input_channels: 进入残差块时的原通道 瓶颈残差块1x1(降维) -> 3x3 -> 1x1(升维)
:param output_channels: 输出时的通道数 若需要下采样或通道变化则在跳跃连接中使用1x1卷积
:param use_1x1conv: 如果输入和输出通道不相等时需要用一个1x1的卷积层对原来的输入进行一个通道提升 """
:param strides: 默认1如果大于1起到缩小张量的作用 expansion = 4 # 输出通道是中间通道的4倍
def __init__(self, in_channels, mid_channels, stride=1, downsample=None):
"""
:param in_channels: 输入通道数
:param mid_channels: 中间层通道数1x1降维后的通道数
:param stride: 步长用于下采样
:param downsample: 下采样模块当stride1或通道变化时使用
""" """
super().__init__() super().__init__()
self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, stride=strides) self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False)
self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1, stride=1) self.bn1 = nn.BatchNorm2d(mid_channels)
if use_1x1conv: self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.conv3 = nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=strides) self.bn2 = nn.BatchNorm2d(mid_channels)
else: self.conv3 = nn.Conv2d(mid_channels, mid_channels * self.expansion, kernel_size=1, bias=False)
self.conv3 = None self.bn3 = nn.BatchNorm2d(mid_channels * self.expansion)
self.bn1 = nn.BatchNorm2d(output_channels) self.relu = nn.ReLU(inplace=True)
self.bn2 = nn.BatchNorm2d(output_channels) self.downsample = downsample
def forward(self, X): def forward(self, x):
Y = F.relu(self.bn1(self.conv1(X))) identity = x
Y = self.bn2(self.conv2(Y))
if self.conv3 is not None: out = self.conv1(x)
X = self.conv3(X) out = self.bn1(out)
Y += X out = self.relu(out)
return F.relu(Y)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Net(nn.Module): class Net(nn.Module):
""" """
模型的主要结构就在这里了到时也好该和调用 基于 Bottleneck ResNet 风格模型
现在必须实现的方法 各阶段配置仿照 ResNet-50适当调整宽度以达到约80M参数
目前还是以图片缩放到256256构建残差块
""" """
def __init__(self): def __init__(self, num_classes=4):
super().__init__() super().__init__()
# 定义残差块的辅助方法 # 第一阶段7x7卷积 + 最大池化
def resnet_block(input_channels, num_channels, num_residuals, first_block=False): self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
""" self.bn1 = nn.BatchNorm2d(64)
:param input_channels: 输入维度 self.relu = nn.ReLU(inplace=True)
:param num_channels: 输出维度 self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
:param num_residuals: 单个残差层的残差块数
:param first_block: 第一块不用下采样 特殊控制
:return: list[nn.Module]
"""
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Resblock(input_channels, num_channels, use_1x1conv=True, strides=2))
else:
blk.append(Resblock(num_channels, num_channels))
return blk
# 构建网络各层 # 残差阶段定义
self.b1 = nn.Sequential( # 每个阶段的参数:[块数, 中间通道数, 步长]
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), # 为了达到80M参数我们略微加宽网络相比标准ResNet-50
nn.BatchNorm2d(64), layers_config = [
nn.ReLU(), (3, 64, 1), # stage2: 3个瓶颈块输出通道 64*4=256
nn.MaxPool2d(kernel_size=3, stride=2, padding=1) (4, 128, 2), # stage3: 4个瓶颈块输出通道 128*4=512
) (14, 256, 2), # stage4: 14个瓶颈块输出通道 256*4=1024加深至此阶段
""" (3, 512, 2) # stage5: 3个瓶颈块输出通道 512*4=2048
7×7 卷积层输出通道 64步长 2填充 3 ]
(3×256×256)->(64×128×128)
批归一化 relu层
最大池化
(64×128×128)->(64×64×64)
"""
self.b2 = nn.Sequential(*resnet_block(64, 64, num_residuals=3, first_block=True))
self.b3 = nn.Sequential(*resnet_block(64, 128, num_residuals=4))
self.b4 = nn.Sequential(*resnet_block(128, 256, num_residuals=6))
self.b5 = nn.Sequential(*resnet_block(256, 512, num_residuals=3))
self.in_channels = 64
self.stage2 = self._make_layer(layers_config[0])
self.stage3 = self._make_layer(layers_config[1])
self.stage4 = self._make_layer(layers_config[2])
self.stage5 = self._make_layer(layers_config[3])
# 全局池化与分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.flatten = nn.Flatten() self.fc = nn.Linear(2048, num_classes)
self.fc = nn.Linear(512, 4)
def _make_layer(self, config):
"""
构建一个残差阶段
:param config: (块数, 中间通道数, 第一阶段步长)
:return: nn.Sequential
"""
num_blocks, mid_channels, stride = config
downsample = None
layers = []
# 第一个块可能需要下采样和通道匹配
if stride != 1 or self.in_channels != mid_channels * Bottleneck.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channels, mid_channels * Bottleneck.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(mid_channels * Bottleneck.expansion),
)
layers.append(
Bottleneck(self.in_channels, mid_channels, stride, downsample)
)
self.in_channels = mid_channels * Bottleneck.expansion
# 后续块
for _ in range(1, num_blocks):
layers.append(
Bottleneck(self.in_channels, mid_channels)
)
return nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.b1(x) x = self.conv1(x)
x = self.b2(x) x = self.bn1(x)
x = self.b3(x) x = self.relu(x)
x = self.b4(x) x = self.maxpool(x)
x = self.b5(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.stage5(x)
x = self.avgpool(x) x = self.avgpool(x)
x = self.flatten(x) x = torch.flatten(x, 1)
x = self.fc(x) x = self.fc(x)
return x return x
if __name__ == '__main__': if __name__ == '__main__':
model = Net() model = Net(num_classes=4)
# 使用 torchsummary 查看模型结构
summary(model, input_size=(3, 256, 256)) summary(model, input_size=(3, 256, 256))

View file

@ -80,7 +80,6 @@ def train(model, train_loader, val_loader, epochs=50, lr=0.001, device='cuda'):
# 1. 定义损失函数和优化器 # 1. 定义损失函数和优化器
criterion = nn.CrossEntropyLoss() # 多分类用交叉熵 criterion = nn.CrossEntropyLoss() # 多分类用交叉熵
# 优化器选择(推荐 Adam 或 SGD
# 或者使用 SGD + 动量 # 或者使用 SGD + 动量
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
@ -142,15 +141,15 @@ if __name__ == '__main__':
# 假设你的 dataloader 已经写好了 # 假设你的 dataloader 已经写好了
train_loader, val_loader, class_names = create_dataloaders( train_loader, val_loader, class_names = create_dataloaders(
data_root='../trash_division_data/ultimate_4_class/', # 与trash-division同级文件夹 data_root='../trash_division_data/ultimate_4_class/', # 与trash-division同级文件夹
batch_size=32, # 根据你的显存调整 batch_size=16, # 根据你的显存调整
image_size=256, # 与你模型输入一致 image_size=256, # 与你模型输入一致
num_workers=4, # Windows 可能需设为 0 num_workers=8, # Windows 可能需设为 0
augment=True # 训练时使用数据增强 augment=True # 训练时使用数据增强
) )
# 1. 创建模型 # 1. 创建模型
device = torch.device('cuda' if torch.cuda.is_available() else 'xpu' if torch.xpu.is_available() else 'cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'xpu' if torch.xpu.is_available() else 'cpu')
model = Net() # 根据你的 Net 类调整 model = Net(num_classes=4) # 根据你的 Net 类调整
model = model.to(device) model = model.to(device)
# 打印模型信息 # 打印模型信息