こちらを参考に自信で準備したイメージを基に,
2つのクラスを持つ学習モデルの実装を試みましたが, ValueErrorが出てしまいます.
実装環境
・Anaconda 4.3.14
・Python 3.5.3
・tensorflow 1.2.0rc2
・keras 2.0.4
・numpy 1.12.1
predict_generator() に問題があるのだと思いますが, 調べても解決できない状況にあり, 投稿させていただきました.
以下に詳細を記します.
実装したコード
# -*- coding: utf-8 -*-
from __future__ import print_function
import os
import sys
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.applications import VGG16
img_w = 150
img_h = 150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
# classes = ['landscape', 'portrait']
# classes = ['landscape', 'portraits']
# nb_classes = len(classes)
nb_train_samples = 180
nb_validation_samples = 20
# nb_samples_per_class = 90
epochs = 10
batch_size = 16
# VGG16 modelのFC層直前の特徴量を保存する
def save_bottleneck_features():
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
# VGG16ネットワークを作成
model = VGG16(include_top=False, weights='imagenet')
generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_w, img_h),
batch_size=batch_size,
shuffle=False,
class_mode=None)
# classes=classes)
bottleneck_features_train = model.predict_generator(
generator, nb_train_samples // batch_size)
np.save(open('bottleneck_features_train.npy', 'wb'),
bottleneck_features_train)
generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_w, img_h),
batch_size=batch_size,
shuffle=False,
class_mode=None)
# classes=classes)
bottleneck_features_validation = model.predict_generator(
generator, nb_validation_samples // batch_size)
np.save(open('bottleneck_features_validation.npy', 'wb'),
bottleneck_features_validation)
# save_bottleneck_featuresで保存した特徴量を基にFC層を作成して学習させる
def train_top_model():
train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
train_labels = np.array([0] * int(nb_train_samples / 2) + [1] * int(nb_train_samples / 2))
validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
validation_labels = np.array([0] * int(nb_validation_samples / 2) + [1] * int(nb_validation_samples / 2))
# (180, 4, 4, 512) 期待する値
print(train_data.shape)
# (20, 4, 4, 512) 期待する値
print(validation_data.shape)
# FCネットワークを作成
top_model = Sequential()
top_model.add(Flatten(input_shape=train_data.shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
top_model.compile(
optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
# visualize model
top_model.summary()
from keras.utils import plot_model
plot_model(top_model, to_file="model.png", show_shapes=True)
# 学習
top_model.fit(
train_data,
train_labels,
epochs=epochs,
batch_size=batch_size,
validation_data=(validation_data, validation_labels))
# validation_split=0.05)
# 学習した重みを保存
top_model.save_weights(top_model_weights_path)
save_bottleneck_features()
train_top_model()
以下がエラーの詳細になります.
Found 180 images belonging to 2 classes.
Found 20 images belonging to 2 classes.
(176, 4, 4, 512) #print(train_data.shape)の値
(16, 4, 4, 512) #print(validation_data.shape)の値
Traceback (most recent call last):
File "train.py", line 115, in <module>
train_top_model()
File "train.py", line 107, in train_top_model
validation_data=(validation_data, validation_labels))
File "C:\Users\username\Anaconda3\envs\py35\lib\site-packages\keras\models.py", line 856, in fit
initial_epoch=initial_epoch)
File "C:\Users\username\Anaconda3\envs\py35\lib\site-packages\keras\engine\training.py", line 1429, in fit
batch_size=batch_size)
File "C:\Users\username\Anaconda3\envs\py35\lib\site-packages\keras\engine\training.py", line 1317, in _standardize_user_data
_check_array_lengths(x, y, sample_weights)
File "C:\Users\username\Anaconda3\envs\py35\lib\site-packages\keras\engine\training.py", line 235, in _check_array_lengths
'and ' + str(list(set_y)[0]) + ' target samples.')
ValueError: Input arrays should have the same number of samples as target arrays. Found 176 input samples and 180 target samples.