使用预先训练的网络使三重损失

我正在尝试使用Triple-Loss技术来使用Keras为人类Re-ID调整EfficientNet网络。这是我正在使用的代码:

这是生成器:

class SampleGen(object):
    def __init__(self, file_class_mapping):
        self.file_class_mapping = file_class_mapping
        self.class_to_list_files = defaultdict(list)
        self.list_all_files = list(file_class_mapping.keys())
        self.range_all_files = list(range(len(self.list_all_files)))

        for file, class_ in file_class_mapping.items():
            self.class_to_list_files[class_].append(file)

        self.list_classes = list(set(self.file_class_mapping.values()))
        self.range_list_classes = range(len(self.list_classes))
        self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
        self.class_weight = self.class_weight / np.sum(self.class_weight)

    def get_sample(self):
        class_idx = np.random.choice(self.range_list_classes, 1, p=self.class_weight)[0]
        examples_class_idx = np.random.choice(range(len(self.class_to_list_files[self.list_classes[class_idx]])), 2)
        positive_example_1, positive_example_2 = \
            self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[0]], \
            self.class_to_list_files[self.list_classes[class_idx]][examples_class_idx[1]]

        negative_example = None
        while negative_example is None or self.file_class_mapping[negative_example] == \
                self.file_class_mapping[positive_example_1]:
            negative_example_idx = np.random.choice(self.range_all_files, 1)[0]
            negative_example = self.list_all_files[negative_example_idx]
        return positive_example_1, negative_example, positive_example_2


def read_and_resize(filepath):
    im = Image.open((filepath)).convert('RGB')
    im = im.resize((image_size, image_size))
    return np.array(im, dtype="float32")


def augment(im_array):
    if np.random.uniform(0, 1) > 0.9:
        im_array = np.fliplr(im_array)
    return im_array


def gen(triplet_gen):
    while True:
        list_positive_examples_1 = []
        list_negative_examples = []
        list_positive_examples_2 = []

        for i in range(batch_size):
            positive_example_1, negative_example, positive_example_2 = triplet_gen.get_sample()
            path_pos1 = join(path_train, positive_example_1)
            path_neg = join(path_train, negative_example)
            path_pos2 = join(path_train, positive_example_2)

            positive_example_1_img = read_and_resize(path_pos1)
            negative_example_img = read_and_resize(path_neg)
            positive_example_2_img = read_and_resize(path_pos2)

            positive_example_1_img = augment(positive_example_1_img)
            negative_example_img = augment(negative_example_img)
            positive_example_2_img = augment(positive_example_2_img)

            list_positive_examples_1.append(positive_example_1_img)
            list_negative_examples.append(negative_example_img)
            list_positive_examples_2.append(positive_example_2_img)

        A = preprocess_input(np.array(list_positive_examples_1))
        B = preprocess_input(np.array(list_positive_examples_2))
        C = preprocess_input(np.array(list_negative_examples))

        label = None

        yield {'anchor_input': A, 'positive_input': B, 'negative_input': C}, label

这是我创建模型的方式:

def get_model():
    base_model = efn.EfficientNetB3(weights='imagenet', include_top=False)
    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.output
    x = Dropout(0.6)(x)
    x = Dense(embedding_dim)(x)
    x = Lambda(lambda x: K.l2_normalize(x, axis=1), name="enc_out")(x)

    embedding_model = Model(base_model.input, x, name="embedding")

    input_shape = (image_size, image_size, 3)

    anchor_input = Input(input_shape, name='anchor_input')
    positive_input = Input(input_shape, name='positive_input')
    negative_input = Input(input_shape, name='negative_input')

    anchor_embedding = embedding_model(anchor_input)
    positive_embedding = embedding_model(positive_input)
    negative_embedding = embedding_model(negative_input)

    inputs = [anchor_input, positive_input, negative_input]
    outputs = [anchor_embedding, positive_embedding, negative_embedding]

    triplet_model = Model(inputs, outputs)
    triplet_model.add_loss(K.mean(triplet_loss(outputs)))

    return embedding_model, triplet_model

这就是我试图进行培训的方式:

if __name__ == '__main__':
    data = pd.read_csv(path_csv)
    train, test = train_test_split(data, train_size=0.7, random_state=1337)
    file_id_mapping_train = {k: v for k, v in zip(train.Image.values, train.Id.values)}
    file_id_mapping_test = {k: v for k, v in zip(test.Image.values, test.Id.values)}

    gen_tr = gen(SampleGen(file_id_mapping_train))
    gen_te = gen(SampleGen(file_id_mapping_test))

    embedding_model, triplet_model = get_model()

    for i, layer in enumerate(embedding_model.layers):
        print(i, layer.name, layer.trainable)

    for layer in embedding_model.layers[379:]:
        layer.trainable = True
    for layer in embedding_model.layers[:379]:
        layer.trainable = False

    triplet_model.compile(loss=None, optimizer=Adam(0.0001))
    history = triplet_model.fit(x=gen_tr,
                                validation_data=gen_te,
                                epochs=10,
                                verbose=1,
                                steps_per_epoch=200,
                                validation_steps=20,
                                callbacks=create_callbacks())

The csv contains two columns (Image, Id) and I am generating triplets on the go using a generator. The layer 379 is the last layer of the network so I just leave that as trainable. I let it run for some epochs and it seems like it doesn't converge, it stays around 2.30. On epochs like 20, the loss is even higher than what I've started with. Here you can see what I mean: train example Is there anything wrong with the way I think about the problem?

谢谢!

评论