用paddle库写的,目前效果不太好
model是resnet18,这几天应该调这个

import os
import datetime
import random

import paddle
import numpy as np
import pandas
from paddle.dataset.image import cv2

from paddle.vision.models import resnet18

import paddle.nn.functional as F

train_dir = "./data/train"


# 图片处理
def transform_img(img):
    # 将图片尺寸缩放道 224x224
    img = cv2.resize(img, (224, 224))
    # 读入的图像数据格式是[H, W, C]
    # 使用转置操作将其变成[C, H, W]
    img = np.transpose(img, (2, 0, 1))
    img = img.astype('float32')
    # 将数据范围调整到[-1.0, 1.0]之间
    img = img / 255.
    img = img * 2.0 - 1.0
    return img


# 定义训练集数据读取器
def data_loader(datadir, batch_size=10, mode='train'):
    filenames = os.listdir(datadir)

    def reader():
        if mode == 'train':
            # 训练时随机打乱数据顺序
            random.shuffle(filenames)
        batch_imgs = []
        batch_labels = []
        for name in filenames:
            filepath = os.path.join(datadir, name)
            img = cv2.imread(filepath)
            img = transform_img(img)
            if name[0] == 'c':
                label = 0
            elif name[0] == 'd':
                label = 1
            else:
                raise 'Not excepted file name'
            # 每读取一个样本的数据,就将其放入数据列表中
            batch_imgs.append(img)
            batch_labels.append(label)
            if len(batch_imgs) == batch_size:
                # 当数据列表的长度等于batch_size的时候,
                # 把这些数据当作一个mini-batch,并作为数据生成器的一个输出
                imgs_array = np.array(batch_imgs).astype('float32')
                labels_array = np.array(batch_labels).reshape(-1, 1)
                yield imgs_array, labels_array
                batch_imgs = []
                batch_labels = []

        if len(batch_imgs) > 0:
            # 剩余样本数目不足一个batch_size的数据,一起打包成一个mini-batch
            imgs_array = np.array(batch_imgs).astype('float32')
            labels_array = np.array(batch_labels).reshape(-1, 1)
            yield imgs_array, labels_array

    return reader


train_loader = data_loader(train_dir, batch_size=5, mode='train')
data_reader = train_loader()


class Runner(object):
    def __init__(self, model, optimizer, loss_fn):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn

        # 记录全局最优指标
        self.best_acc = 0

    # 定义训练过程
    def train_pm(self, train_datadir, **kwargs):
        print('start training ... ')
        self.model.train()

        num_epochs = kwargs.get('num_epochs', 0)
        save_path = kwargs.get("save_path", "/home/aistudio/output/")

        # 定义数据读取器,训练数据读取器
        train_loader = data_loader(train_datadir, batch_size=5, mode='train')

        for epoch in range(num_epochs):
            for batch_id, data in enumerate(train_loader()):
                x_data, y_data = data
                img = paddle.to_tensor(x_data)
                label = paddle.to_tensor(y_data)
                # 运行模型前向计算,得到预测值
                logits = self.model(img)
                avg_loss = self.loss_fn(logits, label)

                if batch_id % 20 == 0:
                    print("epoch: {}, batch_id: {}, loss is: {:.4f}".format(epoch, batch_id, float(avg_loss.numpy())))
                # 反向传播,更新权重,清除梯度
                avg_loss.backward()
                self.optimizer.step()
                self.optimizer.clear_grad()

            acc = self.evaluate_pm(train_loader)
            self.model.train()
            if acc > self.best_acc:
                self.save_model(save_path)
                self.best_acc = acc

    # 模型评估阶段,使用'paddle.no_grad()'控制不计算和存储梯度
    @paddle.no_grad()
    def evaluate_pm(self, train_loader):
        self.model.eval()
        accuracies = []
        losses = []

        for batch_id, data in enumerate(train_loader()):
            x_data, y_data = data
            img = paddle.to_tensor(x_data)
            label = paddle.to_tensor(y_data)
            # 运行模型前向计算,得到预测值
            logits = self.model(img)
            # 多分类,使用softmax计算预测概率
            pred = F.softmax(logits)
            loss = self.loss_fn(pred, label)
            acc = paddle.metric.accuracy(pred, label)
            accuracies.append(acc.numpy())
            losses.append(loss.numpy())
        print("[validation] accuracy/loss: {:.4f}/{:.4f}".format(np.mean(accuracies), np.mean(losses)))
        return np.mean(accuracies)

    @paddle.no_grad()
    def predict_pm(self, x, **kwargs):
        # 将模型设置为评估模式
        self.model.eval()
        # 运行模型前向计算,得到预测值
        logits = self.model(x)
        return logits

    def save_model(self, save_path):
        paddle.save(self.model.state_dict(), save_path + 'palm.pdparams')
        paddle.save(self.optimizer.state_dict(), save_path + 'palm.pdopt')

    def load_model(self, model_path):
        model_state_dict = paddle.load(model_path)
        self.model.set_state_dict(model_state_dict)

# 对Test集进行预测并填入csv
def predict_list():
    runner.load_model('./palm.pdparams')

    def predict(id):
        dict_dir = "./data/test/" + str(id) + ".jpg"

        # 读取测试图片
        img = cv2.imread(os.path.join(dict_dir))
        # 测试图片预处理
        trans_img = transform_img(img)
        unsqueeze_img = paddle.unsqueeze(paddle.to_tensor(trans_img), axis=0)

        # 模型预测
        logits = runner.predict_pm(unsqueeze_img)
        result = F.softmax(logits)

        return result[0][1].item()

    df = pandas.read_csv("./data/sample_submission.csv")

    for now_id in range(1, 12501):
        if now_id % 100 == 0:
            print(str(now_id) + "...\n")
        df.at[now_id - 1, 'label'] = predict(now_id)

    time = datetime.datetime.now().strftime('%Y-%m-%d-%H_%M_%S')
    df.to_csv(time + '.csv', index=False)


loss_fn = F.cross_entropy

model = resnet18()

# 开启0号GPU训练
use_gpu = True
paddle.device.set_device('gpu:0') if use_gpu else paddle.device.set_device('cpu')

# 定义优化器
opt = paddle.optimizer.Adam(learning_rate=0.005, parameters=model.parameters())
runner = Runner(model, opt, loss_fn)

# 启动训练过程
runner.train_pm(train_dir, num_epochs=4, save_path='./')

# 启动预测
# predict_list()