ResNet50的猫狗分类训练及预测
相比于之前写的ResNet18,下面的ResNet50写得更加工程化一点,这还适用与其他分类,就是换一个分类训练只需要修改图片数据的路径即可。
我的代码文件结构
1. 数据处理
首先已经对数据做好了分类
文件夹结构是这样
开始划分数据集
split_data.py
importosimportrandomimportshutildef move_file(target_path, save_train_path, save_val_pathm, scale=0.1):
file_list=os.listdir(target_path)
random.shuffle(file_list)
number= int(len(file_list) *scale)
train_list=file_list[number:]
val_list=file_list[:number]for file intrain_list:
target_file_path=os.path.join(target_path, file)
save_file_path=os.path.join(save_train_path, file)
shutil.copyfile(target_file_path, save_file_path)for file inval_list:
target_file_path=os.path.join(target_path, file)
save_file_path=os.path.join(save_val_pathm, file)
shutil.copyfile(target_file_path, save_file_path)def split_classify_data(base_path, save_path, scale=0.1):
folder_list=os.listdir(base_path)for folder infolder_list:
target_path=os.path.join(base_path, folder)
save_train_path= os.path.join(save_path, 'train', folder)
save_val_path= os.path.join(save_path, 'val', folder)if notos.path.exists(save_train_path):
os.makedirs(save_train_path)if notos.path.exists(save_val_path):
os.makedirs(save_val_path)
move_file(target_path, save_train_path, save_val_path, scale)print(folder, 'finish!')if __name__ == '__main__':
base_path= r'C:\Users\Administrator.DESKTOP-161KJQD\Desktop\save_dir'save_path= r'C:\Users\Administrator.DESKTOP-161KJQD\Desktop\dog_cat' #验证集比例 scale = 0.1split_classify_data(base_path, save_path, scale)
运行完以上代码的到的文件夹结构
一个训练集数据,一个验证集数据
2.数据集的导入
我这个文件写了一个数据集的导入和一个学习率更新的函数。数据导入是通用的
tools.py
importosimporttimeimportcv2importnumpy as npimporttorchimporttorch.nn as nnimporttorch.nn.functional as Fimporttorch.optim as optimimporttorchvisionfrom torch.autograd.variable importVariablefrom torch.utils.tensorboard importSummaryWriterfrom torchvision importdatasets, transformsfrom torch.utils.data importDataset, DataLoaderfrom torch.optim.lr_scheduler importExponentialLR, LambdaLRfrom torchvision.models importResNet50_Weightsfrom tqdm importtqdmfrom classify_cfg import *mean=MEAN
std=STDdef get_dataset(base_dir='', input_size=160):
dateset=dict()
transform_train=transforms.Compose([#分辨率重置为input_size transforms.Resize(input_size),
transforms.RandomRotation(15),#对加载的图像作归一化处理, 并裁剪为[input_sizexinput_sizex3]大小的图像(因为这图片像素不一致直接统一) transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std)
])
transform_val=transforms.Compose([
transforms.Resize(input_size),
transforms.RandomRotation(15),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std)
])
base_dir_train= os.path.join(base_dir, 'train')
train_dataset= datasets.ImageFolder(root=base_dir_train, transform=transform_train)#print("train_dataset=" + repr(train_dataset[1][0].size())) #print("train_dataset.class_to_idx=" + repr(train_dataset.class_to_idx)) #print(train_dataset.classes) classes =train_dataset.classes#classes = train_dataset.class_to_idx classes_num =len(train_dataset.classes)
base_dir_val= os.path.join(base_dir, 'val')
val_dataset= datasets.ImageFolder(root=base_dir_val, transform=transform_val)
dateset['train'] =train_dataset
dateset['val'] =val_datasetreturndateset, classes, classes_numdefupdate_lr(epoch, epochs):"""假设开始的学习率lr是0.001,训练次数epochs是100
当epoch<33时是lr * 1
当33<=epoch<=66 时是lr * 0.5
当66<epoch时是lr * 0.1""" if epoch == 0 or epochs // 3 >epoch:return 1 elif (epochs // 3 * 2 >= epoch) and (epochs // 3 <=epoch):return 0.5 else:return 0.1
3.训练模型
数据集导入好了以后,选择模型,选择优化器等等,然后开始训练。
mytrain.py
importosimporttimeimportcv2importnumpy as npimporttorchimporttorch.nn as nnimporttorch.optim as optimimporttorchvisionfrom torch.autograd.variable importVariablefrom torch.utils.tensorboard importSummaryWriterfrom torch.utils.data importDataset, DataLoaderfrom torch.optim.lr_scheduler importExponentialLR, LambdaLRfrom torchvision.models importResNet50_Weights#from tqdm import tqdm from classify_cfg import * from tools importget_dataset, update_lrdeftrain(model, dateset, epochs, batch_size, device, optimizer, scheduler, criterion, save_path):
train_loader= DataLoader(dateset.get('train'), batch_size=batch_size, shuffle=True)
val_loader= DataLoader(dateset.get('val'), batch_size=batch_size, shuffle=True)#保存为tensorboard文件 write =SummaryWriter(save_path)#训练过程写入txt f = open(os.path.join(save_path, 'log.txt'), 'w', encoding='utf-8')
best_acc=0for epoch inrange(epochs):
train_correct= 0.0model.train()
sum_loss= 0.0accuracy= -1total_num=len(train_loader.dataset)#print(total_num, len(train_loader)) #loop = tqdm(enumerate(train_loader), total=len(train_loader)) batch_count =0for batch_idx, (data, target) inenumerate(train_loader):
start_time=time.time()
data, target=Variable(data).to(device), Variable(target).to(device)
output=model(data)
loss=criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print_loss=loss.data.item()
sum_loss+=print_loss
train_predict= torch.max(output.data, 1)[1]iftorch.cuda.is_available():
train_correct+= (train_predict.cuda() ==target.cuda()).sum()else:
train_correct+= (train_predict ==target).sum()
accuracy= (train_correct / total_num) * 100 #loop.set_description(f'Epoch [{epoch+1}/{epochs}]') #loop.set_postfix(loss=loss.item(), acc='{:.3f}'.format(accuracy)) batch_count +=len(data)
end_time=time.time()
s= f'Epoch:[{epoch+1}/{epochs}] Batch:[{batch_count}/{total_num}] train_acc: {"{:.2f}".format(accuracy)}'\
f'train_loss: {"{:.3f}".format(loss.item())} time: {int((end_time-start_time)*1000)} ms' #print(f'Epoch:[{epoch+1}/{epochs}]', f'Batch:[{batch_count}/{total_num}]', #'train_acc:', '{:.2f}'.format(accuracy), 'train_loss:', '{:.3f}'.format(loss.item()), #'time:', f'{int((end_time-start_time)*1000)} ms') print(s)
f.write(s+'\n')
write.add_scalar('train_acc', accuracy, epoch)
write.add_scalar('train_loss', loss.item(), epoch)#print(optimizer.param_groups[0]['lr']) scheduler.step()if best_acc <accuracy:
best_acc=accuracy
torch.save(model, os.path.join(save_path,'best.pt'))if epoch+1 ==epochs:
torch.save(model, os.path.join(save_path,'last.pt'))#预测验证集 #if (epoch+1) % 5 == 0 or epoch+1 == epochs: model.eval()
test_loss= 0.0correct= 0.0total_num=len(val_loader.dataset)#print(total_num, len(val_loader)) with torch.no_grad():for data, target inval_loader:
data, target=Variable(data).to(device), Variable(target).to(device)
output=model(data)
loss=criterion(output, target)
_, pred= torch.max(output.data, 1)iftorch.cuda.is_available():
correct+= torch.sum(pred.cuda() ==target.cuda())else:
correct+= torch.sum(pred ==target)
print_loss=loss.data.item()
test_loss+=print_loss
acc= correct / total_num * 100avg_loss= test_loss /len(val_loader)
s= f"val acc: {'{:.2f}'.format(acc)} val loss: {'{:.3f}'.format(avg_loss)}" #print('val acc: ', '{:.2f}'.format(acc), 'val loss: ', '{:.3f}'.format(avg_loss)) print(s)
f.write(s+'\n')
write.add_scalar('val_acc', acc, epoch)
write.add_scalar('val_loss', avg_loss, epoch)#loop.set_postfix(val_loss='{:.3f}'.format(avg_loss), val_acc='{:.3f}'.format(acc)) f.close()if __name__ == '__main__':
device=DEVICE
epochs=EPOCHS
batch_size=BATCH_SIZE
input_size=INPUT_SIZE
lr=LR#---------------------------训练------------------------------------- #图片的路径 base_dir = r'C:\Users\Administrator.DESKTOP-161KJQD\Desktop\dog_cat' #保存的路径 save_path = r'C:\Users\Administrator.DESKTOP-161KJQD\Desktop\dog_cat_save'dateset, classes, classes_num= get_dataset(base_dir, input_size=input_size)#model = torchvision.models.resnet50(pretrained=True) model = torchvision.models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
num_ftrs=model.fc.in_features
model.fc=nn.Linear(num_ftrs, classes_num)
model.to(DEVICE)## 损失函数,交叉熵损失函数 criteon =nn.CrossEntropyLoss()#选择优化器 optimizer = optim.SGD(model.parameters(), lr=lr)#学习率更新 #scheduler = ExponentialLR(optimizer, gamma=0.9) scheduler = LambdaLR(optimizer, lr_lambda=lambdaepoch: update_lr(epoch, epochs))#开始训练 train(model, dateset, epochs, batch_size, device, optimizer, scheduler, criteon, save_path)#将label保存起来 with open(os.path.join(save_path, 'labels.txt'), 'w', encoding='utf-8') as f:
f.write(f'{classes_num} {classes}')
训练结束以后,在保存路径下会得到下面的文件
最好的模型,最后一次的模型,标签的列表,训练的记录和tensorboard记录
在该路径下执行 tensorboard --logdir=.
然后在浏览器打开给出的地址,即可看到数据训练过程的绘图
4.对图片进行预测
考虑对于用户来说,用户是在网页或者手机上上传一张图片进行预测,所以这边是采用二进制数据。
mypredict.py
importcv2importnumpy as npimporttorchfrom classify_cfg import * defimg_process(img_betys, img_size, device):
img_arry= np.asarray(bytearray(img_betys), dtype='uint8')#im0 = cv2.imread(img_betys) im0 =cv2.imdecode(img_arry, cv2.IMREAD_COLOR)
image=cv2.resize(im0, (img_size, img_size))
image= np.float32(image) / 255.0image[:, :, ]-=np.float32(mean)
image[:, :, ]/=np.float32(std)
image= image.transpose((2, 0, 1))
im=torch.from_numpy(image).unsqueeze(0)
im=im.to(device)returnimdefpredict(model_path, img, device):
model=torch.load(model_path)
model.to(device)
model.eval()
predicts=model(img)#print(predicts) _, preds = torch.max(predicts, 1)
pred=torch.squeeze(preds)#print(pred) returnpredif __name__ == '__main__':
mean=MEAN
std=STD
device=DEVICE
classes= ['狗', '猫']## 预测 model_path = r'C:\Users\Administrator.DESKTOP-161KJQD\Desktop\dog_cat_save\best.pt'img_path= r'C:\Users\Administrator.DESKTOP-161KJQD\Desktop\save_dir\狗\000000.jpg'with open(img_path,'rb') as f:
img_betys=f.read()
img=img_process(img_betys, 160, device)#print(img.shape) #print(img) pred =predict(model_path, img, device)print(classes[int(pred)])
还有我的配置文件classify_cfg.py
importtorch
BATCH_SIZE= 2 #每批处理的数据 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #放在cuda或者cpu上训练 EPOCHS = 30 #训练数据集的轮次 LR = 1e-3 #学习率 INPUT_SIZE = 160 #输入图片大小 MEAN = [0.485, 0.456, 0.406] #均值 STD = [0.229, 0.224, 0.225] #方差