Keras 模型

  • Sequential model
  • Model class used with the function API

1.Keras 模型共有的方法和属性

from keras.model import Model
from keras.model import model_from_json, model_from_yaml
  • model.layers
  • model.inputs
  • model.outputs
  • model.summary()
  • Config
    • model.get_config()
      • Model.from_config()
      • Sequential.from_config()
  • Weights
    • model.get_weights()
      • to Numpy arrays
    • model.set_weights(weights)
      • from Numpy arrays
    • model.save_weights(filepath)
      • to HDF5 file
    • model.loadweights(filepath, byname = False)
      • from HDF5 file
  • Save or Load
    • model.to_json()
      • modelfromjson()
    • modeltoyaml()
      • modelfromyaml()

2.Model subclassing

  • 构建 full-customizable model by subclassing the Model class
  • 实现 forward pass in the call method
  • 模型的 layers 定义在 __init__(self, ...)
  • 模型的前向传播定义在 call(self, inputs)
  • 可以通过调用制定的自定义损失函数 self.add_loss(loss_tensor)
  • 在 subclassing 模型中,模型的拓扑结构被定义为 Python 代码,而不是 layers 的静态图,因此无法检查或序列化模型的拓扑结构,即以下方法不适用于 subclassing 模型:
    • model.inputs
    • model.outputs
    • model.to_yaml()
    • model.to_json()
    • model.get_config()
  • 模型(keras.model.Model)子类的 API 可以为实现更加复杂的模型提供了灵活性,但是是有代价的,除了以上的功能不能使用,并且模型更复杂,更容易出错


import keras

class SimpleMLP(keras.Model):

    def __init__(self, use_bn = False, use_dp = False, num_classes = 10):
        super(SimpleMLP, self).__init__(name = "mlp")
        self.use_bn = use_bn
        self.use_dp = use_dp
        self.num_classes = num_classes

        # layers
        self.dense1 = keras.layers.Dense(32, activation = "relu")
        self.dense2 = keras.layers.Dense(num_classes, activation = "softmax")
        if self.use_dp:
            self.dp = keras.layers.Dropout(0.5)
        if self.use_bn:
   = keras.layers.BatchNormalization(axis = -1)

    def call(self, inputs):
        x = self.dense1(inputs)
        if self.use_dp:
            x = self.dp(x)
        if self.use_bn:
            x =

        return self.dense2(x)

model = SimpleMLP()

3.Keras Sequential 模型的使用文档

Sequential 模型是层(layers)的线性堆叠

3.1 Keras Sequential Hello World

# in Python
from keras.model import Sequential
from keras.layers import Dense, Activation

# ==========
# 模型构建
# ==========
model = Sequential()
model.add(Dense(units = 64, activation = "relu", input_dim = 784))
model.add(Dense(units = 64, activation = "relu"))
model.add(Dense(units = 10, activation = "softmax"))

# model = Sequential([
#     Dense(64, input_shape = (784,)),
#     Activation("relu"),
#     Dense(64),
#     Activation("relu")
#     Dense(10),
#     Activation("softmax")
# ])

# ==========
# 模型编译
# ==========
# model.compile(loss = "categorical_crossentropy",
#               optimizer = "sgd",
#               metrics = ["accuracy"])

model.compile(loss = keras.losses.categorical_crossentropy
              optimizer = keras.optimizer.SGD(lr = 0.01, momentum = 0.9, nesterov = True),
              metrics = keras.metircs.Accuracy)

# ==========
# 模型训练
# ==========, y_train, epochs = 5, batch_size = 32)
# model.train_on_batch(x_batch, y_batch)

# ==========
# 模型评估
# ==========
loss_and_metrics = model.evaluate(x_test, y_test, batch_size = 128)

# ==========
# 模型预测
# ==========
classes = model.predict(x_test, batch_size = 128)

4.Keras 函数式API 的使用文档

  • Keras 函数式 API 是定义复杂模型的方法
  • Keras 函数式 API 可以重用经过训练的模型,可以通过在张量上调用任何模型并将其视为一个层(layers)
    • 调用模型的结构
    • 调用模型的权重

4.1 Keras 函数式API Hello World

A densely-connected network:

from keras.layers import Input, Dense
from keras.models import Model

# ==========
# 模型构建
# ==========
inputs = Input(shape = (784,))
x = Dense(64, activation = "relu")(inputs)
x = Dense(64, activation = "relu")(x)
predictions = Dense(10, activation = "softmax")(x)
model = Model(inputs = inputs, outputs = predictions)

# ==========
# 模型编译
# ==========
model.compile(optimizer = "rmsprop",
              loss = "categorical_crossentropy",
              metrics = ["accuracy"])

# ==========
# 模型训练
# ==========, labels)

4.2 函数式 API 特点

  • 所有模型都像层(layer)一样可以调用
  • 多输入和多输出模型
  • 共享图层
  • “层节点”概念


# 将图像分类模型转换为视屏分类模型
from keras.layers import TimeDistributed
from keras.layers import Input, Dense
from keras.models import Model

inputs = Input(shape = (784,))
x = Dense(64, activation = "relu")(inputs)
x = Dense(64, activation = "relu")(x)
predictions = Dense(10, activation = "softmax")(x)
model = Model(inputs = inputs, outputs = predictions)

input_sequences = Input(shape = (20, 784))
processed_sequences = TimeDistributed(model)(input_sequences)


from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model

# ==========
# 模型构建
# ==========
# 标题输入
main_input = Input(shape = (100,), dtype = "int32", name = "main_input")
x = Embedding(output_dim = 512, input_dim = 10000, input_length = 100)(main_input)
lstm_out = LSTM(32)(x)
auxiliary_output = Dense(1, activation = "sigmoid", name = "aux_output")(lstm_out)

# 标题发布时间等数据输入
auxiliary_input = Input(shape = (5,), name = "aux_input")

# concatenate the lstm_out and auxiliary_input
x = keras.layers.concatenate([lstm_out, auxiliary_input])
x = Dense(64, activation = "relu")(x)
x = Dense(64, activation = "relu")(x)
x = Dense(64, activation = "relu")(x)
main_output = Dense(1, activation = "sigmoid", name = "main_output")(x)
model = Model(inputs = [main_input, auxiliary_input],
              outputs = [main_output, auxiliary_output])

# ==========
# 模型编译
# ==========
model.compile(optimizer = "rmsprop",
              loss = {
                "main_output": "binary_crossentropy",
                "aux_output": "binary_crossentropy"
              loss_weights = {
                "main_output": 1,
                "aux_output": 0.2

# ==========
# 模型训练
# ==========
        "main_input": headline_data,
        "aux_input": additional_data
        "main_output": labels,
        "aux_output": labels
    epochs = 50,
    batch_size = 32



import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model

# ==========
# 数据处理
# ==========

# ==========
# 模型构建
# ==========
# input layers
tweet_a = Input(shape = (280, 256))
tweet_b = Input(shape = (280, 256))
# LSTM layers
shared_lstm = LSTM(64)
encoded_a = shared_lstm(tweet_a)
encoded_b = shared_lstm(tweet_b)
# concatenate two vectors
merged_vector = keras.layers.concatenate([encoded_a, encoded_b], axis = -1)
# output layers(add a logistic regression on top)
predictions = Dense(1, activation = "sigmoid")(merged_vector)
model = Model(inputs = [tweet_a, tweet_b],
              output = predictions)

# ==========
# 模型编译
# ==========
model.compile(optimizer = "rmsprop",
              loss = "binary_crossentropy",
              metrics = ["accuracy"])

# ==========
# 模型训练
# ==========[data_a, data_b], epochs = 10)



from keras.layers import Input, LSTM

a = Input(shape = (280, 256))

lstm = LSTM(32)
encoded_a = lstm(a)

# assert lstm.output == encoded_a

output = lstm.output
output_shape = lstm.output_shape


from keras.layers import Input, LSTM
a = Input(shape = (280, 256))
b = Input(shape = (280, 256))

lstm = LSTM(32)

encoded_a = lstm(a)
encoded_b = lstm(b)

# lstm.output
# lstm.output_shape
# assert lstm.get_output_at(0) == encoded_a
# assert lstm.get_output_at(1) == encoded_b

output0 = lstm.get_output_at(0)
output1 = lstm.get_output_at(1)
output0_shape = lstm.get_output_shape_at(0)
output1_shape = lstm.get_output_shape_at(1)

4.3 函数式 API 模型例子

Inception module


from keras.layers import Input, Conv2D, MaxPooling2D
from keras.models import Model

input_img = Input(shape = (256, 256, 3))
tower_1 = Conv2D(64, (1, 1), padding = "same", activation = "relu")(input_img)
tower_1 = Conv2D(64, (3, 3), padding = "same", activation = "relu")(tower_1)

tower_2 = Conv2D(64, (1, 1), padding = "same", activation = "relu")(input_img)
tower_2 = Conv2D(64, (5, 5), padding = "same", activation = "relu")(tower_2)

tower_3 = MaxPooling2D((3, 3), strides = (1, 1), padding = "same")(input_img)
tower_3 = Conv2D(64, (1, 1), padding = "same", activation = "relu")(tower_3)

output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis = 1)

model = Modle(input = "",
              output = "")


Residual connection on a convolution layer


from keras.layers import Input, Conv2D
from keras.model import Model

x = Input(shape = (256, 256, 3))
y = Conv2D(3, (3, 3), padding = "same")(x)
output = keras.layers.add([x, y])

model = Model(input = "",
              output = "")


Shared vision model Visual question answering model Video question answering model


  • 回调函数是一个函数的集合,会在训练的阶段使用
  • 可以使用回调函数查看训练模型的内在状态和统计。也可以传递一个列表的回调函数(作为 callbacks关键字参数)到 SequentialModel 类型的 .fit() 方法。在训练时,相应的回调函数的方法会被在各自的阶段被调用


  • keras.callbacks.Callback()
    • 用来创建新的回调函数的抽象基类
    • .params
    • .model
  • keras.callbacks.BaseLogger(stateful_metrics = None)
    • 基类训练 epoch 评估值的均值
  • keras.callbacks.TerminateOnNaN()
    • 当遇到损失为 NaN 停止训练
  • keras.callbacks.ProgbarLogger()
  • keras.callbacks.History()
    • 所有事件都记录到 History 对象
  • keras.callbacks.ModelCheckpoint()
    • 在每个训练期之后保存模型
  • keras.callbacks.EarlyStopping()
  • keras.callbacks.RemoteMonitor()
  • keras.callbacks.LearningRateScheduler(schedule, verbose = 0)
  • keras.callbacks.TensorBoard()
  • keras.callbacks.ReduceLROnPlateau()
  • keras.callbacks.CSVLogger()
  • keras.callbacks.LambdaCallback()


from keras.layers import Dense, Activation
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint

# 模型建立
model = Sequenital()
model.add(Dense(10, input_dim = 784, kernel_initializer = "uniform"))

# 模型编译
model.compile(loss = "categorical_crossentropy",
              optimizer = "rmsporp")

# 模型训练
# 在训练时,保存批量损失值
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs = {}):
        self.losses = []

    def on_batch_end(self, batch, logs = {}):
history = LossHistory()

# 如果验证集损失下降,在每个训练 epoch 后保存模型
checkpointer = ModelCheckpoint(filepath = "/tmp/weight.hdf5",
                               verbose = 1,
                               save_best_only = True), y_train,
          batch_size = 128, epochs = 20,
          verbose = 0,
          validation_data = (x_test, y_test),
          callbacks = [history, checkpointer])

# 模型结果输出


Keras Applications(keras.applications) 提供了预训练好的深度学习模型,这些模型可以用于预测、特征提取等.



在 ImageNet 数据上预训练过的用于图像分类的模型
  • Xception
  • VGG16
  • VGG19
  • ResNet, ResNetV2, ResNeXt
  • InceptionV3
  • InceptionResNet2
  • MobileNet
  • MobileNetV2
  • DenseNet
  • NASNet
from keras.applications.xception import Xception
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.mobilenet import MobileNet
from keras.applications.densenet import DenseNet121
from keras.applications.densenet import DenseNet169
from keras.applications.densenet import DenseNet201
from keras.applications.nasnet import NASNetLarge
from keras.applications.nasnet import NASNetMobile
from keras.applications.mobilenet_v2 import MobileNetV2

# channels_last only; 299x299
xception_model = Xception(include_top = True,
                          weights = "imagenet",
                          input_tensor = None,
                          input_shape = None,
                          pooling = None,
                          classes = 1000)
# channels_first and channels_last; 224x224
vgg16_model = VGG16(include_top = True,
                    weights = "imagenet",
                    input_tensor = None,
                    input_shape = None,
                    pooling = None,
                    classes = 1000)
vgg19_model = VGG19(include_top = True,
                    weights = 'imagenet',
                    input_tensor = None,
                    input_shape = None,
                    pooling = None,
                    classes = 1000)
resnet50_model = ResNet50(include_top = True,
                          weights = 'imagenet',
                          input_tensor = None,
                          input_shape = None,
                          pooling = None,
                          classes = 1000)
inception_v3_model = InceptionV3(include_top = True,
                                 weights = 'imagenet',
                                 input_tensor = None,
                                 input_shape = None,
                                 pooling = None,
                                 classes = 1000)
inception_resnet_v2_model = InceptionResNetV2(include_top = True,
                                              weights = 'imagenet',
                                              input_tensor = None,
                                              input_shape = None,
                                              pooling = None,
                                              classes = 1000)
mobilenet_model = MobileNet(input_shape = None,
                            alpha = 1.0,
                            depth_multiplier = 1,
                            dropout = 1e-3,
                            include_top = True,
                            weights = 'imagenet',
                            input_tensor = None,
                            pooling = None,
                            classes = 1000)
densenet_model = DenseNet121(include_top = True,
                             weights = 'imagenet',
                             input_tensor = None,
                             input_shape = None,
                             pooling = None,
                             classes = 1000)
densenet_model = DenseNet169(include_top = True,
                             weights = 'imagenet',
                             input_tensor = None,
                             input_shape = None,
                             pooling = None,
                             classes = 1000)
densenet_model = DenseNet201(include_top = True,
                             weights = 'imagenet',
                             input_tensor = None,
                             input_shape = None,
                             pooling = None,
                             classes = 1000)
nasnet_model = NASNetLarge(input_shape = None,
                           include_top = True,
                           weights = 'imagenet',
                           input_tensor = None,
                           pooling = None,
                           classes = 1000)
nasnet_model = NASNetMobile(input_shape = None,
                            include_top = True,
                            weights = 'imagenet',
                            input_tensor = None,
                            pooling = None,
                            classes = 1000)
mobilenet_v2_model = MobileNetV2(input_shape = None,
                                 alpha = 1.0,
                                 depth_multiplier = 1,
                                 include_top = True,
                                 weights = 'imagenet',
                                 input_tensor = None,
                                 pooling = None,
                                 classes = 1000)


from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_prediction
import numpy as np

# Load model
model = ResNet50(weights = "imagenet")

# Image data
img_path = "elephant.jpg"
img = image.load_img(img_path, target_size = (224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
x = preprocess_input(x)

preds = model.predict(x)
print("Predicted:", decode_prediction(preds, top = 3)[0])