寻找网站建设员广州网站建设专注乐云seo
news/
2025/9/22 21:23:14/
文章来源:
寻找网站建设员,广州网站建设专注乐云seo,一元钱购买网站空间,广州公关公司基于WIN10的64位系统演示
一、写在前面
本期开始#xff0c;我们继续学习深度学习图像目标检测系列#xff0c;SSD#xff08;Single Shot MultiBox Detector#xff09;模型。 二、SSD简介
SSD#xff08;Single Shot MultiBox Detector#xff09;是一种流行的目标检…基于WIN10的64位系统演示
一、写在前面
本期开始我们继续学习深度学习图像目标检测系列SSDSingle Shot MultiBox Detector模型。 二、SSD简介
SSDSingle Shot MultiBox Detector是一种流行的目标检测算法由 Wei Liu, Dragomir Anguelov, Dumitru Erhan 等人于 2016 年提出。它是一种单阶段的目标检测算法与当时流行的两阶段检测器如 Faster R-CNN相比SSD 提供了更快的检测速度同时仍然具有较高的准确性。
以下是 SSD 的主要特点和组件
1多尺度特征映射
SSD 从不同的层级提取特征图这使得它能够有效地检测不同大小的物体。这是通过在多个特征图上执行预测来实现的其中每个特征图代表不同的尺度。
2默认框或称为先验框、锚框
在每个特征图位置SSD 定义了多个具有不同形状和大小的默认框。这些默认框用于与真实边界框进行匹配并提供回归目标以调整预测的边界框大小和位置。
3单阶段检测器
与两阶段检测器不同SSD 在单个前向传递中同时进行边界框回归和类别分类从而实现了速度和准确性之间的平衡。
4损失函数
SSD 使用了组合损失包括边界框回归的平滑 L1 损失和类别预测的交叉熵损失。
5数据增强
为了提高模型的性能SSD 使用了多种数据增强技术包括随机裁剪、缩放和颜色扭曲等。
6模型骨干
原始的 SSD 使用 VGG-16 作为其骨干网络但后续的变种如 SSDlite 使用了更轻量级的骨干网络如 MobileNet。 三、数据源
来源于公共数据文件设置如下 大概的任务就是用一个框框标记出MTB的位置。 四、SSD实战
直接上代码
import os
import random
import torch
import torchvision
from torchvision.models.detection import ssd300_vgg16
from torchvision.transforms import functional as F
from PIL import Image
from torch.utils.data import DataLoader
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np# Function to parse XML annotations
def parse_xml(xml_path):tree ET.parse(xml_path)root tree.getroot()boxes []for obj in root.findall(object):bndbox obj.find(bndbox)xmin int(bndbox.find(xmin).text)ymin int(bndbox.find(ymin).text)xmax int(bndbox.find(xmax).text)ymax int(bndbox.find(ymax).text)# Check if the bounding box is validif xmin xmax and ymin ymax:boxes.append((xmin, ymin, xmax, ymax))else:print(fWarning: Ignored invalid box in {xml_path} - ({xmin}, {ymin}, {xmax}, {ymax}))return boxes# Function to split data into training and validation sets
def split_data(image_dir, split_ratio0.8):all_images [f for f in os.listdir(image_dir) if f.endswith(.jpg)]random.shuffle(all_images)split_idx int(len(all_images) * split_ratio)train_images all_images[:split_idx]val_images all_images[split_idx:]return train_images, val_images# Dataset class for the Tuberculosis dataset
class TuberculosisDataset(torch.utils.data.Dataset):def __init__(self, image_dir, annotation_dir, image_list, transformNone):self.image_dir image_dirself.annotation_dir annotation_dirself.image_list image_listself.transform transformdef __len__(self):return len(self.image_list)def __getitem__(self, idx):image_path os.path.join(self.image_dir, self.image_list[idx])image Image.open(image_path).convert(RGB)xml_path os.path.join(self.annotation_dir, self.image_list[idx].replace(.jpg, .xml))boxes parse_xml(xml_path)# Check for empty bounding boxes and return Noneif len(boxes) 0:return Noneboxes torch.as_tensor(boxes, dtypetorch.float32)labels torch.ones((len(boxes),), dtypetorch.int64)iscrowd torch.zeros((len(boxes),), dtypetorch.int64)target {}target[boxes] boxestarget[labels] labelstarget[image_id] torch.tensor([idx])target[iscrowd] iscrowd# Apply transformationsif self.transform:image self.transform(image)return image, target# Define the transformations using torchvision
data_transform torchvision.transforms.Compose([torchvision.transforms.ToTensor(), # Convert PIL image to tensortorchvision.transforms.Normalize(mean[0.485, 0.456, 0.406], std[0.229, 0.224, 0.225]) # Normalize the images
])# Adjusting the DataLoader collate function to handle None values
def collate_fn(batch):batch list(filter(lambda x: x is not None, batch))return tuple(zip(*batch))def get_ssd_model_for_finetuning(num_classes):# Load an SSD model with a VGG16 backbone without pre-trained weightsmodel ssd300_vgg16(pretrainedFalse, num_classesnum_classes)return model# Function to save the model
def save_model(model, pathRetinaNet_mtb.pth, save_full_modelFalse):if save_full_model:torch.save(model, path)else:torch.save(model.state_dict(), path)print(fModel saved to {path})# Function to compute Intersection over Union
def compute_iou(boxA, boxB):xA max(boxA[0], boxB[0])yA max(boxA[1], boxB[1])xB min(boxA[2], boxB[2])yB min(boxA[3], boxB[3])interArea max(0, xB - xA 1) * max(0, yB - yA 1)boxAArea (boxA[2] - boxA[0] 1) * (boxA[3] - boxA[1] 1)boxBArea (boxB[2] - boxB[0] 1) * (boxB[3] - boxB[1] 1)iou interArea / float(boxAArea boxBArea - interArea)return iou# Adjusting the DataLoader collate function to handle None values and entirely empty batches
def collate_fn(batch):batch list(filter(lambda x: x is not None, batch))if len(batch) 0:# Return placeholder batch if entirely emptyreturn [torch.zeros(1, 3, 224, 224)], [{}]return tuple(zip(*batch))#Training function with modifications for collecting IoU and loss
def train_model(model, train_loader, optimizer, device, num_epochs10):model.train()model.to(device)loss_values []iou_values []for epoch in range(num_epochs):epoch_loss 0.0total_ious 0num_boxes 0for images, targets in train_loader:# Skip batches with placeholder dataif len(targets) 1 and not targets[0]:continue# Skip batches with empty targetsif any(len(target[boxes]) 0 for target in targets):continueimages [image.to(device) for image in images]targets [{k: v.to(device) for k, v in t.items()} for t in targets]loss_dict model(images, targets)losses sum(loss for loss in loss_dict.values())optimizer.zero_grad()losses.backward()optimizer.step()epoch_loss losses.item()# Compute IoU for evaluationwith torch.no_grad():model.eval()predictions model(images)for i, prediction in enumerate(predictions):pred_boxes prediction[boxes].cpu().numpy()true_boxes targets[i][boxes].cpu().numpy()for pred_box in pred_boxes:for true_box in true_boxes:iou compute_iou(pred_box, true_box)total_ious iounum_boxes 1model.train()avg_loss epoch_loss / len(train_loader)avg_iou total_ious / num_boxes if num_boxes ! 0 else 0loss_values.append(avg_loss)iou_values.append(avg_iou)print(fEpoch {epoch1}/{num_epochs} Loss: {avg_loss} Avg IoU: {avg_iou})# Plotting loss and IoU valuesplt.figure(figsize(12, 5))plt.subplot(1, 2, 1)plt.plot(loss_values, labelTraining Loss)plt.title(Training Loss across Epochs)plt.xlabel(Epochs)plt.ylabel(Loss)plt.subplot(1, 2, 2)plt.plot(iou_values, labelIoU)plt.title(IoU across Epochs)plt.xlabel(Epochs)plt.ylabel(IoU)plt.show()# Save model after trainingsave_model(model)# Validation function
def validate_model(model, val_loader, device):model.eval()model.to(device)with torch.no_grad():for images, targets in val_loader:images [image.to(device) for image in images]targets [{k: v.to(device) for k, v in t.items()} for t in targets]model(images)# Paths to your data
image_dir tuberculosis-phonecamera
annotation_dir tuberculosis-phonecamera# Split data
train_images, val_images split_data(image_dir)# Create datasets and dataloaders
train_dataset TuberculosisDataset(image_dir, annotation_dir, train_images, transformdata_transform)
val_dataset TuberculosisDataset(image_dir, annotation_dir, val_images, transformdata_transform)# Updated DataLoader with new collate function
train_loader DataLoader(train_dataset, batch_size4, shuffleTrue, collate_fncollate_fn)
val_loader DataLoader(val_dataset, batch_size4, shuffleFalse, collate_fncollate_fn)# Model and optimizer
model get_ssd_model_for_finetuning(2)
optimizer torch.optim.Adam(model.parameters(), lr0.001)# Train and validate
train_model(model, train_loader, optimizer, devicecuda, num_epochs10)
validate_model(model, val_loader, devicecuda)#######################################Print Metrics######################################
def calculate_metrics(predictions, ground_truths, iou_threshold0.5):TP 0 # True PositivesFP 0 # False PositivesFN 0 # False Negativestotal_iou 0 # to calculate mean IoUfor pred, gt in zip(predictions, ground_truths):pred_boxes pred[boxes].cpu().numpy()gt_boxes gt[boxes].cpu().numpy()# Match predicted boxes to ground truth boxesfor pred_box in pred_boxes:max_iou 0matched Falsefor gt_box in gt_boxes:iou compute_iou(pred_box, gt_box)if iou max_iou:max_iou iouif iou iou_threshold:matched Truetotal_iou max_iouif matched:TP 1else:FP 1FN len(gt_boxes) - TPprecision TP / (TP FP) if (TP FP) ! 0 else 0recall TP / (TP FN) if (TP FN) ! 0 else 0f1_score (2 * precision * recall) / (precision recall) if (precision recall) ! 0 else 0mean_iou total_iou / (TP FP) if (TP FP) ! 0 else 0return precision, recall, f1_score, mean_ioudef evaluate_model(model, dataloader, device):model.eval()model.to(device)all_predictions []all_ground_truths []with torch.no_grad():for images, targets in dataloader:images [image.to(device) for image in images]predictions model(images)all_predictions.extend(predictions)all_ground_truths.extend(targets)precision, recall, f1_score, mean_iou calculate_metrics(all_predictions, all_ground_truths)return precision, recall, f1_score, mean_ioutrain_precision, train_recall, train_f1, train_iou evaluate_model(model, train_loader, cuda)
val_precision, val_recall, val_f1, val_iou evaluate_model(model, val_loader, cuda)print(Training Set Metrics:)
print(fPrecision: {train_precision:.4f}, Recall: {train_recall:.4f}, F1 Score: {train_f1:.4f}, Mean IoU: {train_iou:.4f})print(\nValidation Set Metrics:)
print(fPrecision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}, Mean IoU: {val_iou:.4f})#sheet
header | Metric | Training Set | Validation Set |
divider ----------------------------------------train_metrics f| Precision | {train_precision:.4f} | {val_precision:.4f} |
recall_metrics f| Recall | {train_recall:.4f} | {val_recall:.4f} |
f1_metrics f| F1 Score | {train_f1:.4f} | {val_f1:.4f} |
iou_metrics f| Mean IoU | {train_iou:.4f} | {val_iou:.4f} |print(header)
print(divider)
print(train_metrics)
print(recall_metrics)
print(f1_metrics)
print(iou_metrics)
print(divider)#######################################Train Set######################################
import numpy as np
import matplotlib.pyplot as pltdef plot_predictions_on_image(model, dataset, device, title):# Select a random image from the datasetidx np.random.randint(50, len(dataset))image, target dataset[idx]img_tensor image.clone().detach().to(device).unsqueeze(0)# Use the model to make predictionsmodel.eval()with torch.no_grad():prediction model(img_tensor)# Inverse normalization for visualizationinv_normalize transforms.Normalize(mean[-0.485/0.229, -0.456/0.224, -0.406/0.225],std[1/0.229, 1/0.224, 1/0.225])image inv_normalize(image)image torch.clamp(image, 0, 1)image F.to_pil_image(image)# Plot the image with ground truth boxesplt.figure(figsize(10, 6))plt.title(title with Ground Truth Boxes)plt.imshow(image)ax plt.gca()# Draw the ground truth boxes in bluefor box in target[boxes]:rect plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],fillFalse, colorblue, linewidth2)ax.add_patch(rect)plt.show()# Plot the image with predicted boxesplt.figure(figsize(10, 6))plt.title(title with Predicted Boxes)plt.imshow(image)ax plt.gca()# Draw the predicted boxes in redfor box in prediction[0][boxes].cpu():rect plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],fillFalse, colorred, linewidth2)ax.add_patch(rect)plt.show()# Call the function for a random image from the train dataset
plot_predictions_on_image(model, train_dataset, cuda, Selected from Training Set)#######################################Val Set####################################### Call the function for a random image from the validation dataset
plot_predictions_on_image(model, val_dataset, cuda, Selected from Validation Set)
需要从头训练的就不跑了。
结尾我开始摆烂了。
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/910414.shtml
如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!