Tensorflow2.3.0代码规范
更新时间:2023-01-18
Tensorflow 2.3.0代码规范
基于Tensorflow2.3.0框架的MNIST图像分类,训练数据集tf_train_data2.zip点击这里下载。
如下所示是其超参搜索任务中一个超参数组合的训练代码,代码会通过argparse模块接受在平台中填写的信息,请保持一致。
tensorflow2.3_autosearch.py示例代码
Python
1# -*- coding:utf-8 -*-
2""" tensorflow2 train demo """
3import tensorflow as tf
4import os
5import numpy as np
6import time
7import argparse
8from rudder_autosearch.sdk.amaas_tools import AMaasTools
9
10def parse_arg():
11 """parse arguments"""
12 parser = argparse.ArgumentParser(description='tensorflow2.3 mnist Example')
13 parser.add_argument('--train_dir', type=str, default='./train_data',
14 help='input data dir for training (default: ./train_data)')
15 parser.add_argument('--test_dir', type=str, default='./test_data',
16 help='input data dir for test (default: ./test_data)')
17 parser.add_argument('--output_dir', type=str, default='./output',
18 help='output dir for auto_search job (default: ./output)')
19 parser.add_argument('--job_id', type=str, default="job-1234",
20 help='auto_search job id (default: "job-1234")')
21 parser.add_argument('--trial_id', type=str, default="0-0",
22 help='auto_search id of a single trial (default: "0-0")')
23 parser.add_argument('--metric', type=str, default="acc",
24 help='evaluation metric of the model')
25 parser.add_argument('--data_sampling_scale', type=float, default=1.0,
26 help='sampling ratio of the data (default: 1.0)')
27 parser.add_argument('--batch_size', type=int, default=100,
28 help='number of images input in an iteration (default: 100)')
29 parser.add_argument('--lr', type=float, default=0.001,
30 help='learning rate of the training (default: 0.001)')
31 parser.add_argument('--epoch', type=int, default=5,
32 help='number of epochs to train (default: 5)')
33 args = parser.parse_args()
34 args.output_dir = os.path.join(args.output_dir, args.job_id, args.trial_id)
35 if not os.path.exists(args.output_dir):
36 os.makedirs(args.output_dir)
37 print("job_id: {}, trial_id: {}".format(args.job_id, args.trial_id))
38 return args
39
40def load_data(data_sampling_scale):
41 """ load data """
42 mnist = tf.keras.datasets.mnist
43 work_path = os.getcwd()
44 (x_train, y_train), (x_test, y_test) = mnist.load_data('%s/train_data/mnist.npz' % work_path)
45 # sample training data
46 np.random.seed(0)
47 sample_data_num = int(data_sampling_scale * len(x_train))
48 idx = np.arange(len(x_train))
49 np.random.shuffle(idx)
50 x_train, y_train = x_train[0:sample_data_num], y_train[0:sample_data_num]
51 x_train, x_test = x_train / 255.0, x_test / 255.0
52 return (x_train, x_test), (y_train, y_test)
53
54def Model(learning_rate):
55 """Model"""
56 model = tf.keras.models.Sequential([
57 tf.keras.layers.Flatten(input_shape=(28, 28)),
58 tf.keras.layers.Dense(128, activation='relu'),
59 tf.keras.layers.Dropout(0.2),
60 tf.keras.layers.Dense(10, activation='softmax')
61 ])
62 model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
63 loss='sparse_categorical_crossentropy',
64 metrics=['accuracy'])
65 return model
66
67def evaluate(model, x_test, y_test):
68 """evaluate"""
69 loss, acc = model.evaluate(x_test, y_test, verbose=2)
70 print("accuracy: %f" % acc)
71 return acc
72
73def report_final(args, metric):
74 """report_final_result"""
75 # 结果上报sdk
76 amaas_tools = AMaasTools(args.job_id, args.trial_id)
77 metric_dict = {args.metric: metric}
78 for i in range(3):
79 flag, ret_msg = amaas_tools.report_final_result(metric=metric_dict,
80 export_model_path=args.output_dir,
81 checkpoint_path="")
82 print("End Report, metric:{}, ret_msg:{}".format(metric, ret_msg))
83 if flag:
84 break
85 time.sleep(1)
86 assert flag, "Report final result to manager failed! Please check whether manager'address or manager'status " \
87 "is ok! "
88
89def main():
90 """main"""
91 # 获取参数
92 args = parse_arg()
93 # 加载数据集
94 (x_train, x_test), (y_train, y_test) = load_data(args.data_sampling_scale)
95 # 模型定义
96 model = Model(args.lr)
97 # 模型训练
98 model.fit(x_train, y_train, epochs=args.epoch, batch_size=args.batch_size)
99 # 模型保存
100 model.save(args.output_dir)
101 # 模型评估
102 acc = evaluate(model, x_test, y_test)
103 # 上报结果
104 report_final(args, metric=acc)
105
106if __name__ == '__main__':
107 main()
示例代码对应的yaml配置如下,请保持格式一致
pwo_search_demo.yml示例内容
Plain Text
1#搜索算法参数
2search_strategy:
3 algo: PARTICLE_SEARCH #搜索策略:粒子群算法
4 params:
5 population_num: 8 #种群个体数量 | [1,10] int类型
6 round: 10 #迭代轮数 |[5,50] int类型
7 inertia_weight: 0.5 # 惯性权重 |(0,1] float类型
8 global_acceleration: 1.5 #全局加速度 |(0,4] float类型
9 local_acceleration: 1.5 #个体加速度 |(0,4] float类型
10
11#单次训练时数据的采样比例,单位%
12data_sampling_scale: 100 #|(0,100] int类型
13
14#评价指标参数
15metrics:
16 name: acc #评价指标 | 任意字符串 str类型
17 goal: MAXIMIZE #最大值/最小值 | str类型 MAXIMIZE or MINIMIZE 必须为这两个之一(也即支持大写)
18 expected_value: 100 #早停标准值,评价指标超过该值则结束整个超参搜索,单位% |无限制 int类型
19
20#搜索参数空间
21search_space:
22 batch_size:
23 htype: choice
24 value: [100, 200, 300, 400, 500, 600]
25 lr:
26 htype: loguniform
27 value: [0.0001, 0.9]
28 epoch:
29 htype: choice
30 value: [5, 10, 12]