文章目录
- 一、声明
- 二、工程结构
- 三、文件内容
- main.py
- model.py
- dataset.py
- utils.py
- 四、问题汇总
一、声明
非常感谢这些资料的作者:
【参考1】、【PyTorch速成教程 (by Sung Kim)】
二、工程结构
├── main.py:实现训练 (train) 、验证(validation)和测试(test)
│ ├── model.py:实现的模型
│ ├── dataset.py:加载的数据
│ ├── utils.py:常用功能
三、文件内容
main.py
from torch.utils.data import Dataset, DataLoader
from torch import from_numpy, tensor
from torch.autograd import Variable
import numpy as np
import model
import utils# load data
dataset = MyDataset()
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)# model
model=Model()# define loss and optimizer
criterion=torch.nn.BCELoss(size_average=True)
optimizer=torch.optim.SGD(model.parameters(),lr=0.1)# train
for epoch in range(2):for i, data in enumerate(train_loader, 0):# get the inputsinputs, labels = data# wrap them in Variableinputs, labels = Variable(inputs), Variable(labels)# Forward passy_pred=model(inputs)# Compute and print lossloss=criterion(y_pred,labels)accuracy= ultis.accuracy(y_pred,labels)print("[{:05d}/{:05d}] train_loss:{:.4f} accuracy: {:.4f}]".format(i,epoch,loss.data[0],accuracy))# updateoptimizer.zero_grad() # zero gradientsloss.backward() # perform a backward passoptimizer.step() # update weight or parameters
model.py
import torch
class Model(torch.nn.Module):def __init__(self):super(Model,self).__init__()self.l1=torch.nn.Linear(8,6)self.l2=torch.nn.Linear(6,4)self.l3=torch.nn.Linear(4,1)self.sigmoid=torch.nn.Sigmoid()# 数据流def forward(self,x):out1=self.sigmoid(self.l1(x))out2=self.sigmoid(self.l2(out1))y_pred=self.sigmoid(self.l3(out2))return y_pred
dataset.py
要点:
(1)必须重载 __getitem__
和__len__
;
(2)
import torch
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):def __init__(self): # Initialize your data, download, etc.xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32)self.len = xy.shape[0]self.x_data = torch.from_numpy(xy[:, 0:-1])self.y_data = torch.from_numpy(xy[:, [-1]])def __getitem__(self, index):return self.x_data[index], self.y_data[index]def __len__(self):return self.len
utils.py
import numpy as np
import scipy.sparse as sp
import torch
import osdef encode_onehot(labels):classes = set(labels)classes_dict = {c: np.identity(len(classes))[i, :] for i, c inenumerate(classes)}labels_onehot = np.array(list(map(classes_dict.get, labels)),dtype=np.int32)return labels_onehotdef accuracy(output, labels):preds = output.max(1)[1].type_as(labels)correct = preds.eq(labels).double()correct = correct.sum()return correct / len(labels)def list_all_files(rootdir):_files = []#列出文件夹下所有的目录与文件list_file = os.listdir(rootdir)for i in range(0,len(list_file)):# 构造路径path = os.path.join(rootdir,list_file[i])# 判断路径是否是一个文件目录或者文件# 如果是文件目录,继续递归 if os.path.isdir(path):_files.extend(list_all_files(path))if os.path.isfile(path):_files.append(path)return _filesdef mkdir(path):# 去除首位空格path=path.strip()# 去除尾部 \ 符号path=path.rstrip("\\")# 判断路径是否存在# 存在 True# 不存在 FalseisExists=os.path.exists(path)# 判断结果if not isExists:# 如果不存在则创建目录# 创建目录操作函数os.makedirs(path) print(path+' create sucess')return Trueelse:# 如果目录存在则不创建,并提示目录已存在print(path+' path exist !')return False
四、问题汇总
问:dataset.py中__getitem__
返回的是一个元素,还是一个batch数据?
答: