PaddlePaddle 2.1.1代码规范
更新时间:2023-01-18
PaddlePaddle 2.1.1代码规范
基于PaddlePaddle2.1.1框架的MNIST图像分类,训练数据集paddle_train_data.zip点击这里下载。
如下所示是其超参搜索任务中一个超参数组合的训练代码,代码会通过argparse模块接受在平台中填写的信息,请保持一致。
PaddlePaddle2.1.1_autosearch.py示例代码
Python
1# -*- coding:utf-8 -*-
2""" paddle train demo """
3import os
4import numpy as np
5import paddle # 导入paddle模块
6import paddle.fluid as fluid
7import gzip
8import struct
9import argparse
10import time
11from rudder_autosearch.sdk.amaas_tools import AMaasTools
12
13def parse_arg():
14 """parse arguments"""
15 parser = argparse.ArgumentParser(description='paddle2.1.1 mnist Example')
16 parser.add_argument('--train_dir', type=str, default='./train_data',
17 help='input data dir for training (default: ./train_data)')
18 parser.add_argument('--test_dir', type=str, default='./test_data',
19 help='input data dir for test (default: ./test_data)')
20 parser.add_argument('--output_dir', type=str, default='./output',
21 help='output dir for auto_search job (default: ./output)')
22 parser.add_argument('--job_id', type=str, default="job-1234",
23 help='auto_search job id (default: "job-1234")')
24 parser.add_argument('--trial_id', type=str, default="0-0",
25 help='auto_search id of a single trial (default: "0-0")')
26 parser.add_argument('--metric', type=str, default="acc",
27 help='evaluation metric of the model')
28 parser.add_argument('--data_sampling_scale', type=float, default=1.0,
29 help='sampling ratio of the data (default: 1.0)')
30 parser.add_argument('--batch_size', type=int, default=64,
31 help='number of images input in an iteration (default: 64)')
32 parser.add_argument('--lr', type=float, default=0.001,
33 help='learning rate of the training (default: 0.001)')
34 parser.add_argument('--epoch', type=int, default=5,
35 help='number of epochs to train (default: 5)')
36 args = parser.parse_args()
37 args.output_dir = os.path.join(args.output_dir, args.job_id, args.trial_id)
38 if not os.path.exists(args.output_dir):
39 os.makedirs(args.output_dir)
40 print("job_id: {}, trial_id: {}".format(args.job_id, args.trial_id))
41 return args
42
43def load_data(file_dir, is_train=True):
44 """
45 :param file_dir:
46 :param is_train:
47 :return:
48 """
49 if is_train:
50 image_path = file_dir + '/train-images-idx3-ubyte.gz'
51 label_path = file_dir + '/train-labels-idx1-ubyte.gz'
52 else:
53 image_path = file_dir + '/t10k-images-idx3-ubyte.gz'
54 label_path = file_dir + '/t10k-labels-idx1-ubyte.gz'
55 with open(image_path.replace('.gz', ''), 'wb') as out_f, gzip.GzipFile(image_path) as zip_f:
56 out_f.write(zip_f.read())
57 # os.unlink(image_path)
58 with open(label_path.replace('.gz', ''), 'wb') as out_f, gzip.GzipFile(label_path) as zip_f:
59 out_f.write(zip_f.read())
60 # os.unlink(label_path)
61 with open(label_path[:-3], 'rb') as lbpath:
62 magic, n = struct.unpack('>II', lbpath.read(8))
63 labels = np.fromfile(lbpath, dtype=np.uint8)
64 with open(image_path[:-3], 'rb') as imgpath:
65 magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
66 images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
67 return images, labels
68
69def reader_creator(file_dir, is_train=True, buffer_size=100, data_sampling_scale=1):
70 """
71 :param file_dir:
72 :param is_train:
73 :param buffer_size:
74 :return:
75 """
76 images, labels = load_data(file_dir, is_train)
77 if is_train:
78 np.random.seed(0)
79 sample_data_num = int(data_sampling_scale * len(images))
80 idx = np.arange(len(images))
81 np.random.shuffle(idx)
82 images, labels = images[0:sample_data_num], labels[0:sample_data_num]
83 def reader():
84 """
85 :return:
86 """
87 for num in range(int(len(labels) / buffer_size)):
88 for i in range(buffer_size):
89 yield images[num * buffer_size + i, :], int(labels[num * buffer_size + i])
90 return reader
91
92def reader_load(args):
93 """reader_load"""
94 # 每次读取训练集中的500个数据并随机打乱,传入batched reader中,batched reader 每次 yield args.batch_size个数据
95 train_reader = paddle.batch(
96 paddle.reader.shuffle(
97 reader_creator(args.train_dir, is_train=True, buffer_size=100,
98 data_sampling_scale=args.data_sampling_scale), buf_size=500),
99 batch_size=args.batch_size)
100 # 读取测试集的数据,每次 yield 64个数据
101 test_reader = paddle.batch(
102 reader_creator(args.test_dir, is_train=False, buffer_size=100), batch_size=args.batch_size)
103 return train_reader, test_reader
104
105def softmax_regression():
106 """
107 定义softmax分类器:
108 一个以softmax为激活函数的全连接层
109 Return:
110 predict_image -- 分类的结果
111 """
112 # 输入的原始图像数据,大小为28*28*1
113 img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
114 # 以softmax为激活函数的全连接层,输出层的大小必须为数字的个数10
115 predict = fluid.layers.fc(
116 input=img, size=10, act='softmax')
117 return predict
118
119def multilayer_perceptron():
120 """
121 定义多层感知机分类器:
122 含有两个隐藏层(全连接层)的多层感知器
123 其中前两个隐藏层的激活函数采用 ReLU,输出层的激活函数用 Softmax
124 Return:
125 predict_image -- 分类的结果
126 """
127 # 输入的原始图像数据,大小为28*28*1
128 img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
129 # 第一个全连接层,激活函数为ReLU
130 hidden = fluid.layers.fc(input=img, size=200, act='relu')
131 # 第二个全连接层,激活函数为ReLU
132 hidden = fluid.layers.fc(input=hidden, size=200, act='relu')
133 # 以softmax为激活函数的全连接输出层,输出层的大小必须为数字的个数10
134 prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
135 return prediction
136
137def convolutional_neural_network():
138 """
139 定义卷积神经网络分类器:
140 输入的二维图像,经过两个卷积-池化层,使用以softmax为激活函数的全连接层作为输出层
141 Return:
142 predict -- 分类的结果
143 """
144 # 输入的原始图像数据,大小为28*28*1
145 img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
146 # 第一个卷积-池化层
147 # 使用20个5*5的滤波器,池化大小为2,池化步长为2,激活函数为Relu
148 conv_pool_1 = fluid.nets.simple_img_conv_pool(
149 input=img,
150 filter_size=5,
151 num_filters=20,
152 pool_size=2,
153 pool_stride=2,
154 act="relu")
155 conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
156 # 第二个卷积-池化层
157 # 使用20个5*5的滤波器,池化大小为2,池化步长为2,激活函数为Relu
158 conv_pool_2 = fluid.nets.simple_img_conv_pool(
159 input=conv_pool_1,
160 filter_size=5,
161 num_filters=50,
162 pool_size=2,
163 pool_stride=2,
164 act="relu")
165 # 以softmax为激活函数的全连接输出层,输出层的大小必须为数字的个数10
166 prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
167 return prediction
168
169def train_program():
170 """
171 配置train_program
172 Return:
173 predict -- 分类的结果
174 avg_cost -- 平均损失
175 acc -- 分类的准确率
176 """
177 paddle.enable_static()
178 # 标签层,名称为label,对应输入图片的类别标签
179 label = fluid.layers.data(name='label', shape=[1], dtype='int64')
180 # predict = softmax_regression() # 取消注释将使用 Softmax回归
181 # predict = multilayer_perceptron() # 取消注释将使用 多层感知器
182 predict = convolutional_neural_network() # 取消注释将使用 LeNet5卷积神经网络
183 # 使用类交叉熵函数计算predict和label之间的损失函数
184 cost = fluid.layers.cross_entropy(input=predict, label=label)
185 # 计算平均损失
186 avg_cost = fluid.layers.mean(cost)
187 # 计算分类准确率
188 acc = fluid.layers.accuracy(input=predict, label=label)
189 return predict, [avg_cost, acc]
190
191def optimizer_program():
192 """
193 :return:
194 """
195 return fluid.optimizer.Adam(learning_rate=0.001)
196
197def event_handler(pass_id, batch_id, cost):
198 """event_handler"""
199 # 打印训练的中间结果,训练轮次,batch数,损失函数
200 print("Pass %d, Batch %d, Cost %f" % (pass_id, batch_id, cost))
201
202def train_test(train_test_program,
203 train_test_feed, train_test_reader, executor, fetch_list):
204 """train_test"""
205 # 将分类准确率存储在acc_set中
206 acc_set = []
207 # 将平均损失存储在avg_loss_set中
208 avg_loss_set = []
209 # 将测试 reader yield 出的每一个数据传入网络中进行训练
210 for test_data in train_test_reader():
211 avg_loss_np, acc_np = executor.run(
212 program=train_test_program,
213 feed=train_test_feed.feed(test_data),
214 fetch_list=fetch_list)
215 acc_set.append(float(acc_np))
216 avg_loss_set.append(float(avg_loss_np))
217 # 获得测试数据上的准确率和损失值
218 acc_val_mean = np.array(acc_set).mean()
219 avg_loss_val_mean = np.array(avg_loss_set).mean()
220 # 返回平均损失值,平均准确率
221 return avg_loss_val_mean, acc_val_mean
222
223class Model():
224 def __init__(self, args, train_reader, test_reader):
225 self.args = args
226 self.create_model()
227 self.train_reader = train_reader
228 self.test_reader = test_reader
229
230 def create_model(self):
231 """create_model"""
232 # 该模型运行在单个CPU上
233 self.place = fluid.CPUPlace()
234 # 调用train_program 获取预测值,损失值
235 self.prediction, [self.avg_loss, self.acc] = train_program()
236 # 输入的原始图像数据,大小为28*28*1
237 img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
238 # 标签层,名称为label,对应输入图片的类别标签
239 label = fluid.layers.data(name='label', shape=[1], dtype='int64')
240 # 告知网络传入的数据分为两部分,第一部分是img值,第二部分是label值
241 self.feeder = fluid.DataFeeder(feed_list=[img, label], place=self.place)
242 # 选择Adam优化器
243 optimizer = fluid.optimizer.Adam(learning_rate=self.args.lr)
244 optimizer.minimize(self.avg_loss)
245
246 def run_train(self):
247 PASS_NUM = self.args.epoch
248 epochs = [epoch_id for epoch_id in range(PASS_NUM)]
249
250 self.exe = fluid.Executor(self.place)
251 self.exe.run(fluid.default_startup_program())
252 main_program = fluid.default_main_program()
253 step = 0
254 for epoch_id in epochs:
255 print("Epoch %d:" % (epoch_id))
256 for step_id, data in enumerate(self.train_reader()):
257 metrics = self.exe.run(main_program,
258 feed=self.feeder.feed(data),
259 fetch_list=[self.avg_loss, self.acc])
260 if step % 100 == 0: # 每训练100次 更新一次图片
261 event_handler(step, epoch_id, metrics[0])
262 step += 1
263
264 def save_model(self):
265 """save_model"""
266 # 将模型参数存储在名为 save_dirname 的文件中
267 save_dirname = self.args.output_dir
268 fluid.io.save_inference_model(save_dirname,
269 ["img"], [self.prediction], self.exe,
270 model_filename='model',
271 params_filename='params')
272
273 def evaluate(self):
274 """evaluate"""
275 test_program = fluid.default_main_program().clone(for_test=True)
276 avg_loss_val, acc_val = train_test(train_test_program=test_program,
277 train_test_reader=self.test_reader,
278 train_test_feed=self.feeder,
279 executor=self.exe,
280 fetch_list=[self.avg_loss, self.acc])
281 print("accuracy: %f" % acc_val)
282 return acc_val
283
284def report_final(args, metric):
285 """report_final_result"""
286 # 结果上报sdk
287 amaas_tools = AMaasTools(args.job_id, args.trial_id)
288 metric_dict = {args.metric: metric}
289 for i in range(3):
290 flag, ret_msg = amaas_tools.report_final_result(metric=metric_dict,
291 export_model_path=args.output_dir,
292 checkpoint_path="")
293 print("End Report, metric:{}, ret_msg:{}".format(metric, ret_msg))
294 if flag:
295 break
296 time.sleep(1)
297 assert flag, "Report final result to manager failed! Please check whether manager'address or manager'status " \
298 "is ok! "
299
300def main():
301 """main"""
302 # 获取参数
303 args = parse_arg()
304 # 加载数据集
305 train_reader, test_reader = reader_load(args)
306 # 模型定义
307 model = Model(args, train_reader, test_reader)
308 # 模型训练
309 model.run_train()
310 # 模型保存
311 model.save_model()
312 # 模型评估
313 acc = model.evaluate()
314 # 上报结果
315 report_final(args, metric=acc)
316
317if __name__ == '__main__':
318 main()
示例代码对应的yaml配置如下,请保持格式一致
pwo_search_demo_for_paddle.yml示例内容
Plain Text
1#搜索算法参数
2search_strategy:
3 algo: PARTICLE_SEARCH #搜索策略:粒子群算法
4 params:
5 population_num: 8 #种群个体数量 | [1,10] int类型
6 round: 10 #迭代轮数 |[5,50] int类型
7 inertia_weight: 0.5 # 惯性权重 |(0,1] float类型
8 global_acceleration: 1.5 #全局加速度 |(0,4] float类型
9 local_acceleration: 1.5 #个体加速度 |(0,4] float类型
10
11#单次训练时数据的采样比例,单位%
12data_sampling_scale: 100 #|(0,100] int类型
13
14#评价指标参数
15metrics:
16 name: acc #评价指标 | 任意字符串 str类型
17 goal: MAXIMIZE #最大值/最小值 | str类型 MAXIMIZE or MINIMIZE 必须为这两个之一(也即支持大写)
18 expected_value: 100 #早停标准值,评价指标超过该值则结束整个超参搜索,单位% |无限制 int类型
19
20#搜索参数空间
21search_space:
22 batch_size:
23 htype: choice
24 value: [64, 128, 256, 512]
25 lr:
26 htype: loguniform
27 value: [0.0001, 0.1]
28 epoch:
29 htype: choice
30 value: [1, 5, 10]