简介:本文面向TensorFlow初学者,以MNIST手写数字数据集为例,系统讲解如何构建回归模型完成数字识别任务。通过代码示例与理论结合,详细阐述数据预处理、模型搭建、训练优化及预测评估全流程。
MNIST数据集包含60,000张训练图像和10,000张测试图像,每张图像为28×28像素的灰度手写数字(0-9)。传统分类任务通过softmax输出10个类别的概率,而回归任务则尝试直接预测数字的连续值(如将”3”映射为3.0)。这种设定虽不常见于实际场景,但作为入门案例能有效帮助理解TensorFlow的核心机制。
import tensorflow as tffrom tensorflow.keras import layers, modelsimport numpy as npimport matplotlib.pyplot as plt# 验证TensorFlow版本print(f"TensorFlow版本: {tf.__version__}") # 推荐2.x版本
# 加载MNIST数据集(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()# 数据预处理def preprocess_data(x, y):x = x.reshape(-1, 28*28).astype('float32') / 255.0 # 展平并归一化y = y.astype('float32') # 转换为浮点型return x, yx_train, y_train = preprocess_data(x_train, y_train)x_test, y_test = preprocess_data(x_test, y_test)
技术要点:
reshape(-1, 28*28)将二维图像转为一维向量
def build_regression_model():model = models.Sequential([layers.Dense(128, activation='relu', input_shape=(784,)),layers.Dropout(0.2), # 防止过拟合layers.Dense(64, activation='relu'),layers.Dense(1) # 输出层无激活函数,直接回归数值])return modelmodel = build_regression_model()model.compile(optimizer='adam',loss='mse', # 均方误差损失metrics=['mae']) # 平均绝对误差
history = model.fit(x_train, y_train,epochs=20,batch_size=32,validation_split=0.2,verbose=1)
def plot_training_history(history):plt.figure(figsize=(12, 4))plt.subplot(1, 2, 1)plt.plot(history.history['loss'], label='Train Loss')plt.plot(history.history['val_loss'], label='Validation Loss')plt.title('Loss Evolution')plt.xlabel('Epoch')plt.ylabel('MSE')plt.legend()plt.subplot(1, 2, 2)plt.plot(history.history['mae'], label='Train MAE')plt.plot(history.history['val_mae'], label='Validation MAE')plt.title('MAE Evolution')plt.xlabel('Epoch')plt.ylabel('MAE')plt.legend()plt.tight_layout()plt.show()plot_training_history(history)
ReduceLROnPlateau回调
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
test_loss, test_mae = model.evaluate(x_test, y_test, verbose=0)print(f"Test MSE: {test_loss:.4f}, Test MAE: {test_mae:.4f}")
def predict_and_visualize(model, x_test, y_test, num_samples=5):predictions = model.predict(x_test[:num_samples]).flatten()plt.figure(figsize=(15, 3))for i in range(num_samples):plt.subplot(1, num_samples, i+1)plt.imshow(x_test[i].reshape(28, 28), cmap='gray')plt.title(f"True: {y_test[i]}\nPred: {predictions[i]:.1f}")plt.axis('off')plt.tight_layout()plt.show()predict_and_visualize(model, x_test, y_test)
def analyze_prediction_errors(model, x_test, y_test):predictions = model.predict(x_test).flatten()errors = predictions - y_testplt.figure(figsize=(10, 5))plt.scatter(y_test, errors, alpha=0.5)plt.axhline(y=0, color='r', linestyle='--')plt.title('Prediction Errors by True Value')plt.xlabel('True Value')plt.ylabel('Prediction Error')plt.show()analyze_prediction_errors(model, x_test, y_test)
def build_cnn_regression_model():model = models.Sequential([layers.Reshape((28, 28, 1), input_shape=(784,)),layers.Conv2D(32, (3, 3), activation='relu'),layers.MaxPooling2D((2, 2)),layers.Conv2D(64, (3, 3), activation='relu'),layers.MaxPooling2D((2, 2)),layers.Flatten(),layers.Dense(64, activation='relu'),layers.Dense(1)])return model
def huber_loss(y_true, y_pred, delta=1.0):error = y_true - y_predis_small_error = tf.abs(error) < deltasquared_loss = tf.square(error) / 2linear_loss = delta * (tf.abs(error) - delta / 2)return tf.where(is_small_error, squared_loss, linear_loss)# 使用自定义损失model.compile(optimizer='adam', loss=huber_loss)
# 完整训练流程示例import tensorflow as tffrom tensorflow.keras import layers, modelsimport numpy as npimport matplotlib.pyplot as plt# 1. 数据加载与预处理(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()def preprocess(x, y):x = x.reshape(-1, 28*28).astype('float32') / 255y = y.astype('float32')return x, yx_train, y_train = preprocess(x_train, y_train)x_test, y_test = preprocess(x_test, y_test)# 2. 模型构建model = models.Sequential([layers.Dense(128, activation='relu', input_shape=(784,)),layers.Dropout(0.2),layers.Dense(64, activation='relu'),layers.Dense(1)])model.compile(optimizer='adam',loss='mse',metrics=['mae'])# 3. 训练配置callbacks = [tf.keras.callbacks.EarlyStopping(patience=10),tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3)]# 4. 模型训练history = model.fit(x_train, y_train,epochs=50,batch_size=32,validation_split=0.2,callbacks=callbacks,verbose=1)# 5. 评估与可视化test_loss, test_mae = model.evaluate(x_test, y_test)print(f"\nTest MSE: {test_loss:.4f}, Test MAE: {test_mae:.4f}")# 可视化函数同上...
实践要点:
常见问题解决:
扩展学习路径:
通过本文的完整流程,读者可以系统掌握使用TensorFlow构建MNIST回归模型的全过程,为后续深入学习计算机视觉和深度学习打下坚实基础。