Sklearn 0.23.2
更新时间:2023-01-18
Sklearn
sklearn框架下,自定义作业支持发布保存模型为pickle
和joblib
格式,并且在发布至模型仓库时需要选择相应的模型文件。使用下面代码进行模型训练时,训练程序可以自行加载数据,训练数据选择空文件夹即可。
pickle格式模型示例代码:
Python
1# -*- coding:utf-8 -*-
2""" sklearn train demo """
3import numpy as np
4from sklearn import datasets
5from sklearn.model_selection import train_test_split
6from sklearn.linear_model import LogisticRegression
7def load_data():
8 """ load data """
9 # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
10 # 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
11 inputdata = datasets.load_iris()
12 # 切分,测试训练2,8分
13 x_train, x_test, y_train, y_test = \
14 train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
15 return x_train, x_test, y_train, y_test
16def save_model(model):
17 """ save model with pickle format """
18 import pickle
19 with open('output/clf.pickle','wb') as f:
20 pickle.dump(model, f)
21def save_model_joblib(model):
22 """ save model with joblib format """
23 try:
24 import joblib
25 except:
26 from sklearn.externals import joblib
27 joblib.dump(model, 'output/clf.pkl')
28def main():
29 """ main """
30 # 训练集x ,测试集x,训练集label,测试集label
31 x_train, x_test, y_train, y_test = load_data()
32 # l2为正则项
33 model = LogisticRegression(penalty='l2')
34 model.fit(x_train, y_train)
35 save_model(model)
36 print("w: %s" % model.coef_)
37 print("b: %s" % model.intercept_)
38 # 准确率
39 print("precision: %s" % model.score(x_test, y_test))
40 print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
41if __name__ == '__main__':
42 main()
joblib格式示例代码:
Python
1# -*- coding:utf-8 -*-
2import numpy as np
3
4from sklearn import datasets
5from sklearn.model_selection import train_test_split
6from sklearn.linear_model import LogisticRegression
7
8def load_data():
9 # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
10 # 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
11 inputdata = datasets.load_iris()
12 # 切分,测试训练2,8分
13 x_train, x_test, y_train, y_test = \
14 train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
15 return x_train, x_test, y_train, y_test
16
17
18def save_model(model):
19 import pickle
20 with open('output/clf.pickle','wb') as f:
21 pickle.dump(model, f)
22
23
24def save_model_joblib(model):
25 from sklearn.externals import joblib
26 joblib.dump(model, 'output/clf.pkl')
27
28
29def main():
30 # 训练集x ,测试集x,训练集label,测试集label
31 x_train, x_test, y_train, y_test = load_data()
32 # l2为正则项
33 model = LogisticRegression(penalty='l2')
34 model.fit(x_train, y_train)
35
36 save_model_joblib(model)
37 print("w: %s" % model.coef_)
38 print("b: %s" % model.intercept_)
39 # 准确率
40 print("precision: %s" % model.score(x_test, y_test))
41 print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
42
43if __name__ == '__main__':
44 main()