история одной неудачи

Это один из вариантов получить приемлемый результат в соревновании

TGS Salt Identification Challenge
Segment salt deposits beneath the Earth's surface


Здесь изложены идеи и код. Но полученный результат не принес ничего, кроме опыта и знаний

Все данные и train и test это uint8. Поэтому все аугментации готовим в этом же пространстве. 
Также для того, что бы получить в аугментациях уменьшение масштаба нужно брать исходные картинки побольше. Т.е. для сжатия породы нужно взять из 101х101 область больше, чем 32х32 и сжать и соответственно если меньше 32х32, то растянуть.
Третья идея состоит в том, что сдвиги земной коры достаточно точно описываются сигмоидой. 
Т.е. если левая сторона не движется, а справа порода движется вверх, 
то между ними порода смещается по сигмоиде.
Чем ближе к краю, тем больше движение похоже на движение края.
Интенсивность пикселей не менялась, только среднее картинки 32х32 приводилось к 0.


from __future__ import print_function

import os
import numpy as np
import pandas as pd

import pylab
import pickle
from tqdm import tqdm
from tqdm import tqdm_notebook 

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import NoNorm

import skimage.io as io

from keras.models import Model, load_model
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Dropout
from keras.layers import Input, BatchNormalization, Activation
from keras.layers import UpSampling2D, Concatenate

from keras.losses import binary_crossentropy
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import backend as K

import tensorflow as tf

from keras.preprocessing.image import load_img

rows, cols = 101, 101
w_rows, w_cols = 32, 32
n_folds = 5


train_df = pd.read_csv("/data/ssd1/salt_ident/train.csv", index_col="id", usecols=[0])
depths_df = pd.read_csv("/data/ssd1/salt_ident/depths.csv", index_col="id")
train_df = train_df.join(depths_df)
test_df = depths_df[~depths_df.index.isin(train_df.index)]
train_df["masks"] = [np.array(np.rint(np.sign(load_img("/data/ssd1/salt_ident/train/masks/{}.png".format(idx), grayscale=True))).astype('int')) for idx in tqdm_notebook(train_df.index)]
train_df["images"] = [np.array(load_img("/data/ssd1/salt_ident/train/images/{}.png".format(idx), grayscale=True)).astype('int') for idx in tqdm_notebook(train_df.index)]

Подпрограммы проверки свойств картинок и масок и сегментации картинок 101х101 -> 32х32
def cov_to_class(val):    
    for i in range(0, 11):
        if val * 10 <= i :
            return i

def test_empty(img):
    return np.mean(img) > 0

def test_line(mask):
    for j in range(cols-1):
        if all(mask[2:-2,j+1] == 1) and all( mask[2:-2,j] == 0) :
            return False
        if all(mask[2:-2,j+1] == 0) and all( mask[2:-2,j] == 1) :
            return False
    return True

def fragment(img, p):
    _r = p//4
    _c = p%4
    if _r < 3 :
        if _c < 3:
            return img[_c * w_cols: (_c + 1) * w_cols, _r * w_rows: (_r+1)*w_rows,: ]
        else:
            return img[-w_cols:, _r * w_rows: (_r+1)*w_rows, : ]
    else:
        if _c < 3:
            return img[_c * w_cols: (_c + 1) * w_cols, -w_rows:, :]
        else:
            return img[-w_cols:, -w_rows:, :]

def seq_101_32(img_seq):
    _t = np.zeros((img_seq.shape[0]*16, w_cols, w_rows, 1), dtype=img_seq.dtype)
    for k in range(img_seq.shape[0]):
        for kk in range(16):
            _t[k*16+kk,:,:,:] = fragment(img_seq[k], kk)    
    return _t

# добавляем столбцы с разными метками, после проще 
# формировать фолды и манипулировать 
# пустыми картинками или масками с прямымми линиями

train_df["coverage"] = train_df.masks.map(np.sum) / pow(cols, 2)
train_df["coverage_class"] = train_df.coverage.map(cov_to_class)
train_df["empty"] = train_df.images.map(test_empty)
train_df['line'] = train_df.masks.map(test_line)

Стратифицируем по мощности маски
train_df.sort_values('coverage_class', inplace=True)
train_df['fold'] = (list(range(n_folds))*train_df.shape[0])[:train_df.shape[0]]


train_df.head()

id z masks images coverage coverage_class empty line fold

575d24d81d 843 [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… [[134, 131, 134, 137, 136, 136, 136, 134, 123,… 0.0 0 True True 0
77ecd76754 99 [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… [[86, 97, 109, 140, 168, 185, 194, 207, 203, 1… 0.0 0 True True 1
6940237693 777 [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… [[118, 121, 126, 128, 127, 120, 110, 102, 100,… 0.0 0 True True 2
40b45f1871 748 [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… [[153, 154, 148, 143, 136, 129, 109, 101, 107,… 0.0 0 True True 3
f78361aa48 606 [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,… [[126, 136, 145, 150, 152, 150, 142, 131, 118,… 0.0 0 True True 4

Подпрограммы расчета accuracy и loss взяты из кернелов kaggle.
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred = K.cast(y_pred, 'float32')
    y_pred_f = K.cast(K.greater(K.flatten(y_pred), 0.5), 'float32')
    intersection = y_true_f * y_pred_f
    score = 2. * K.sum(intersection) / (K.sum(y_true_f) + K.sum(y_pred_f))
    return score

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)

#https://www.kaggle.com/cpmpml/fast-iou-metric-in-numpy-and-tensorflow
def get_iou_vector(A, B):
    # Numpy version
    
    batch_size = A.shape[0]
    metric = 0.0
    for batch in range(batch_size):
        t, p = A[batch], B[batch]
        true = np.sum(t)
        pred = np.sum(p)
        
        # deal with empty mask first
        if true == 0:
            metric += (pred == 0)
            continue
        
        # non empty mask case.  Union is never empty 
        # hence it is safe to divide by its number of pixels
        intersection = np.sum(t * p)
        union = true + pred - intersection
        iou = intersection / union
        
        # iou metrric is a stepwise approximation of the real iou over 0.5
        iou = np.floor(max(0, (iou - 0.45)*20)) / 10
        
        metric += iou
        
    # teake the average over all images in batch
    metric /= batch_size
    return metric


def my_iou_metric(label, pred):
    # Tensorflow version
    return tf.py_func(get_iou_vector, [label, pred > 0.5], tf.float64)


from keras.utils.generic_utils import get_custom_objects

get_custom_objects().update({'bce_dice_loss': bce_dice_loss })
get_custom_objects().update({'dice_loss': dice_loss })
get_custom_objects().update({'dice_coef': dice_coef })
get_custom_objects().update({'my_iou_metric': my_iou_metric })

Простая, обычная ванильная u-net. код также взят с kaggle.
Если взять resnet, то результат улучшается, но, как увидим дальше, это неважно.

def conv_block(m, dim, acti, bn, res, do=0):
    n = Conv2D(dim, 3, activation=acti, padding='same')(m)
    n = BatchNormalization()(n) if bn else n
    n = Dropout(do)(n) if do else n
    n = Conv2D(dim, 3, activation=acti, padding='same')(n)
    n = BatchNormalization()(n) if bn else n
    return Concatenate()([m, n]) if res else n

def level_block(m, dim, depth, inc, acti, do, bn, mp, up, res):
    if depth > 0:
        n = conv_block(m, dim, acti, bn, res)
        m = MaxPooling2D()(n) if mp else Conv2D(dim, 3, strides=2, padding='same')(n)
        m = level_block(m, int(inc*dim), depth-1, inc, acti, do, bn, mp, up, res)
        if up:
            m = UpSampling2D()(m)
            m = Conv2D(dim, 2, activation=acti, padding='same')(m)
        else:
            m = Conv2DTranspose(dim, 3, strides=2, activation=acti, padding='same')(m)
        n = Concatenate()([n, m])
        m = conv_block(n, dim, acti, bn, res)
    else:
        m = conv_block(m, dim, acti, bn, res, do)
    return m


def UNet(img_shape, out_ch=1, start_ch=32, depth=4, inc_rate=2., activation='relu', 
         dropout=0.5, batchnorm=False, maxpool=True, upconv=False, residual=False):
    i = Input(shape=img_shape)
    o = level_block(i, start_ch, depth, inc_rate, activation, dropout, batchnorm, maxpool, upconv, residual)
    o = Conv2D(out_ch, 1, activation='sigmoid')(o)
    return Model(inputs=i, outputs=o)



Блок аугментации размеров. Дальше есть код для визуальной проверки. Работает быстро. Пробовал разные аугментации, но этот выбор определен формой сдвига земной породы. Вычисляются координаты верхней линии и нижней. Можно упростить если не предполагать, что сигмоиды отличаются. После вычисляется сетка и по сетке строится картинка 32х32. Случайно вычисляется левый верхний угол (угол с наименьшми координатами) и по нему вычисляются остальные углы p[0] — кривизна сигмоиды

(p[1],p[2]) — левый верхний угол (p[3],p[4]) — правый верхний (p[5],p[6]) — правый нижний (p[7],p[8]) — левый нижний

такой вот полигон

#  # NEW
from joblib import Parallel, delayed
import math
from scipy import ndimage


def sigmd(x):
    return 1 / (1 + math.exp(np.float(-x)))

def aug_img_cv(_xy, p):    
    C = np.zeros((w_cols, w_rows), dtype='float')
    R = np.zeros((w_cols, w_rows), dtype='float')
    
    a1 = [p[1] + (p[3] - p[1])* sigmd(p[0]*(x-w_cols/2.)/float(w_cols)) for x in np.arange(0.,float(w_cols),1.) ]
    a2 = [p[2] + (p[4] - p[2])* x/float(w_cols) for x in np.arange(0.,float(w_cols),1.) ]
    b1 = [p[7] + (p[5] - p[7])* sigmd(p[0]*(x-w_cols/2.)/float(w_cols)) for x in np.arange(0.,float(w_cols),1.) ]
    b2 = [p[8] + (p[6] - p[8])* x/float(w_cols) for x in np.arange(0.,float(w_cols),1.) ]
    ba1= np.subtract(b1, a1)/w_cols
    ba2= np.subtract(b2, a2)/w_rows

    for i in range(w_cols):
        C[:,i] = np.array([ (ba1[i])*j + a1[i] for j in np.arange(0.,float(w_cols),1.) ])
        R[:,i] = np.array([ (ba2[i])*j + a2[i] for j in np.arange(0.,float(w_rows),1.) ])
            
    _t = np.array(list(_xy)[0]).squeeze().astype('float')
    _zx = ndimage.map_coordinates(_t, [C, R], order=1)
    _t = np.array(list(_xy)[1]).squeeze().astype('float')
    _zy = ndimage.map_coordinates(_t, [C, R], order=1)

    return np.rint(_zx), np.rint(_zy)

co_size = w_cols*0.1

def augment(_x, _y):
    parallel = Parallel(3, verbose=0)
    p = np.zeros((_x.shape[0],11), dtype='float')
    p[:,0] = np.random.sample(size=_x.shape[0]) * 8. + 1. # sigmoid 
    
    p[:,1] = np.random.sample(size=_x.shape[0]) *( cols - w_cols + co_size )  # cols left top
    p[:,2] = np.random.sample(size=_x.shape[0]) *( rows - w_rows + co_size )  # rows left top   
    
    #p[:,3] = np.random.sample(size=_x.shape[0]) * 2.*co_size - co_size + p[:,1]
    b = np.maximum(p[:,1] - co_size,0)
    a = p[:,1] + co_size
    p[:,3] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a
    b = np.minimum(p[:,2] + w_rows + co_size, rows)
    a = p[:,2] + w_rows - co_size
    p[:,4] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a
    
#    p[:,5] = np.random.sample(size=_x.shape[0]) * 2.*co_size - co_size + p[:,1] + w_cols
#    p[:,6] = np.random.sample(size=_x.shape[0]) * 2.*co_size - co_size + p[:,2] + w_rows

    b = np.minimum(p[:,1] + w_cols + co_size, cols)
    a = p[:,1] + w_cols - co_size
    p[:,7] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a
    b = np.maximum(p[:,2] - co_size,0)
    a = p[:,2] + co_size
    p[:,8] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a

#    p[:,5] = p[:,3] + float(w_rows)
#    p[:,7] = p[:,1] + float(w_rows)
    p[:,5] = p[:,3] + (p[:,7] - p[:,1])
    p[:,6] = p[:,8] + (p[:,4] - p[:,2])
    
    p[:,9] = np.random.sample(size=_x.shape[0]) + 0.33  # gamma 0.33...1.33
    p[:,10] = np.random.sample(size=_x.shape[0])*5. - 10. # color +- 5

#
# если полигон выходит за границы 101х101, то он сдвигается внутрь
#
    for k in range(_x.shape[0]):
        if p[k,5] > cols:
            p[k,[1,3,5,7]] -= (p[k,5] - cols)
        if p[k,6] > rows:
            p[k,[2,4,6,8]] -= (p[k,6] - rows)
            
        
#    print (' prob ready ')
    _txy = parallel(delayed(aug_img_cv)(xy,p[idx,:]) for idx,xy in enumerate(zip(_x,_y)))
    _tx = np.array(_txy)[:,0,:,:].reshape(-1,w_cols,w_cols,1)
    _ty = np.array(_txy)[:,1,:,:].reshape(-1,w_cols,w_cols,1)
    
    return _tx.reshape(-1,w_cols,w_rows,1), _ty.reshape(-1,w_cols,w_rows,1)


сама программа подготовки данных, визуализации и обучения
from sklearn.utils import shuffle

def mean_x(_img):
    _t0 = _img.squeeze().astype('float')
    _t0 = _t0 - round(np.mean(_t0))
    _t = _t0 / 255.
#    _t = _t + 1
    return np.expand_dims(_t,axis=-1)

#models = []

net_size = 16
lr_init = 1e-4
batch_size = 25


lr = lr_init
model = UNet((w_cols,w_rows,1),start_ch=64,depth=4)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=[my_iou_metric])
#model.summary()

#
# картинки/маски берем непустые и без линий
#

fold_id = 0
valid_XX = np.array(train_df['images'][(train_df.fold == fold_id) & train_df["line"].values  & train_df["empty"].values].tolist() )
valid_YY = np.array(train_df['masks' ][(train_df.fold == fold_id) & train_df["line"].values  & train_df["empty"].values].tolist() )
train_XX = np.array(train_df['images'][(train_df.fold != fold_id) & train_df["line"].values  & train_df["empty"].values].tolist() )
train_YY = np.array(train_df['masks' ][(train_df.fold != fold_id) & train_df["line"].values  & train_df["empty"].values].tolist() )
valid_XX = valid_XX.reshape(-1, cols, rows, 1)
valid_YY = valid_YY.reshape(-1, cols, rows, 1)
train_XX = train_XX.reshape(-1, cols, rows, 1)
train_YY = train_YY.reshape(-1, cols, rows, 1)
    
#
# переводим из последовательности 101х101 в 32х32
#
val_32 = seq_101_32(valid_XX)
val_x = np.array([mean_x(x) for x in val_32])
val_y = seq_101_32( valid_YY.astype('float'))


#
# смотрим глазами на то, что получилось
#
i_shift = valid_XX.shape[0]//2
fig, axes = plt.subplots(4,8,figsize=(12,6))
for idx in range(16):    
    k = idx+i_shift*16
    axes[idx%4,idx//4].set_axis_off()
    axes[idx%4,idx//4].imshow(val_x[k].squeeze(), cmap='gray')
    axes[idx%4,idx//4 + 4].set_axis_off()
    axes[idx%4,idx//4 + 4].imshow(val_y[k].squeeze(), cmap='gray')
        
fig, axes = plt.subplots(1,2,figsize=(12,12))
axes[0].set_axis_off()
axes[0].imshow(valid_XX[i_shift].squeeze(), cmap='gray')
axes[1].set_axis_off()
axes[1].imshow(valid_YY[i_shift].squeeze(), cmap='gray')
plt.show(block=True)


#
# также train переводим из последовательности 101х101 в 32х32
# и смотрим 
#
train_x = seq_101_32(train_XX)
train_y = seq_101_32(train_YY)

i_shift = train_XX.shape[0]//2
fig, axes = plt.subplots(4,8,figsize=(12,6))
for idx in range(16):
    k = idx+i_shift*16
    axes[idx%4,idx//4].set_axis_off()
    axes[idx%4,idx//4].imshow(train_x[k].squeeze(), cmap='gray')
    axes[idx%4,idx//4 + 4].set_axis_off()
    axes[idx%4,idx//4 + 4].imshow(train_y[k].squeeze(), cmap='gray') # , norm=NoNorm()
        
fig, axes = plt.subplots(1,2,figsize=(12,12))
axes[0].set_axis_off()
axes[0].imshow(train_XX[i_shift].squeeze(), cmap='gray')
axes[1].set_axis_off()
axes[1].imshow(train_YY[i_shift].squeeze(), cmap='gray')

plt.show(block=True)

#
# к исходному train в виде 32х32
# добавляем аугментации размера
#
for k in tqdm_notebook(range(4)):
    train_x1, train_y1 = augment(train_XX, train_YY)
    train_x = np.append(train_x, train_x1, axis=0)
    train_y = np.append(train_y, train_y1, axis=0)
        
#
# переводим train в диапазон -1..+1 и в каждой картинке ставим mean  в 0
#
train_x = np.array([mean_x(x) for x in train_x ])

#
# перемешиваем. Картинки были упорядочены по мощности соли
#
train_x, train_y = shuffle(train_x, train_y)


early_stopping = EarlyStopping(monitor='val_my_iou_metric', mode = 'max',patience=10, verbose=1)
model_checkpoint = ModelCheckpoint('./keras_model.h5',monitor='val_my_iou_metric', 
                               mode = 'max', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_my_iou_metric', mode = 'max',factor=0.2, patience=5, min_lr=0.00001, verbose=1)
#reduce_lr = ReduceLROnPlateau(factor=0.2, patience=5, min_lr=0.00001, verbose=1)
K.set_value(model.optimizer.lr, 1e-4)

history = model.fit(train_x, train_y,
                        validation_data=[val_x, val_y], 
                        epochs=200,
                        batch_size=25,
                        callbacks=[early_stopping, model_checkpoint],
                        verbose=2)
100% 4/4 [00:11<00:00, 2.93s/it]

Train on 60580 samples, validate on 12352 samples
Epoch 1/200
 - 153s - loss: 0.3079 - my_iou_metric: 0.7524 - val_loss: 0.2199 - val_my_iou_metric: 0.8102

Epoch 00001: val_my_iou_metric improved from -inf to 0.81018, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 2/200
 - 153s - loss: 0.2171 - my_iou_metric: 0.8089 - val_loss: 0.1823 - val_my_iou_metric: 0.8429

Epoch 00002: val_my_iou_metric improved from 0.81018 to 0.84287, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 3/200
 - 154s - loss: 0.1879 - my_iou_metric: 0.8299 - val_loss: 0.1698 - val_my_iou_metric: 0.8525

Epoch 00003: val_my_iou_metric improved from 0.84287 to 0.85253, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 4/200
 - 155s - loss: 0.1681 - my_iou_metric: 0.8447 - val_loss: 0.1666 - val_my_iou_metric: 0.8470

Epoch 00004: val_my_iou_metric did not improve
Epoch 5/200
 - 155s - loss: 0.1561 - my_iou_metric: 0.8544 - val_loss: 0.1487 - val_my_iou_metric: 0.8630

Epoch 00005: val_my_iou_metric improved from 0.85253 to 0.86303, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 6/200
 - 155s - loss: 0.1416 - my_iou_metric: 0.8626 - val_loss: 0.1708 - val_my_iou_metric: 0.8610

Epoch 00006: val_my_iou_metric did not improve
Epoch 7/200
 - 155s - loss: 0.1278 - my_iou_metric: 0.8733 - val_loss: 0.1501 - val_my_iou_metric: 0.8690

Epoch 00007: val_my_iou_metric improved from 0.86303 to 0.86895, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 8/200
 - 155s - loss: 0.1180 - my_iou_metric: 0.8798 - val_loss: 0.1502 - val_my_iou_metric: 0.8728

Epoch 00008: val_my_iou_metric improved from 0.86895 to 0.87284, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 9/200
 - 155s - loss: 0.1014 - my_iou_metric: 0.8898 - val_loss: 0.1502 - val_my_iou_metric: 0.8706

Epoch 00009: val_my_iou_metric did not improve
Epoch 10/200
 - 155s - loss: 0.0865 - my_iou_metric: 0.8986 - val_loss: 0.1787 - val_my_iou_metric: 0.8722

Epoch 00010: val_my_iou_metric did not improve
Epoch 11/200
 - 155s - loss: 0.0753 - my_iou_metric: 0.9064 - val_loss: 0.2077 - val_my_iou_metric: 0.8680

Epoch 00011: val_my_iou_metric did not improve
Epoch 12/200
 - 155s - loss: 0.0660 - my_iou_metric: 0.9129 - val_loss: 0.1935 - val_my_iou_metric: 0.8736

Epoch 00012: val_my_iou_metric improved from 0.87284 to 0.87357, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 13/200
 - 155s - loss: 0.0574 - my_iou_metric: 0.9175 - val_loss: 0.2133 - val_my_iou_metric: 0.8749

Epoch 00013: val_my_iou_metric improved from 0.87357 to 0.87489, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 14/200
 - 155s - loss: 0.0543 - my_iou_metric: 0.9206 - val_loss: 0.1960 - val_my_iou_metric: 0.8726

Epoch 00014: val_my_iou_metric did not improve
Epoch 15/200
 - 155s - loss: 0.0463 - my_iou_metric: 0.9269 - val_loss: 0.2079 - val_my_iou_metric: 0.8685

Epoch 00015: val_my_iou_metric did not improve
Epoch 16/200
 - 155s - loss: 0.0438 - my_iou_metric: 0.9283 - val_loss: 0.1992 - val_my_iou_metric: 0.8499

Epoch 00016: val_my_iou_metric did not improve
Epoch 17/200
 - 155s - loss: 0.0385 - my_iou_metric: 0.9330 - val_loss: 0.2063 - val_my_iou_metric: 0.8727

Epoch 00017: val_my_iou_metric did not improve
Epoch 18/200
 - 155s - loss: 0.0369 - my_iou_metric: 0.9347 - val_loss: 0.2721 - val_my_iou_metric: 0.8709

Epoch 00018: val_my_iou_metric did not improve
Epoch 19/200
 - 155s - loss: 0.0350 - my_iou_metric: 0.9364 - val_loss: 0.2365 - val_my_iou_metric: 0.8778

Epoch 00019: val_my_iou_metric improved from 0.87489 to 0.87775, saving model to accu_1d_32_salt_model_fo_0.h5
Epoch 20/200
 - 155s - loss: 0.0324 - my_iou_metric: 0.9404 - val_loss: 0.2457 - val_my_iou_metric: 0.8747

Epoch 00020: val_my_iou_metric did not improve
Epoch 21/200
 - 155s - loss: 0.0279 - my_iou_metric: 0.9434 - val_loss: 0.2258 - val_my_iou_metric: 0.8644

Epoch 00021: val_my_iou_metric did not improve
Epoch 22/200
 - 155s - loss: 0.0298 - my_iou_metric: 0.9430 - val_loss: 0.2216 - val_my_iou_metric: 0.8498

Epoch 00022: val_my_iou_metric did not improve
Epoch 23/200
 - 155s - loss: 0.0261 - my_iou_metric: 0.9472 - val_loss: 0.2718 - val_my_iou_metric: 0.8675

Epoch 00023: val_my_iou_metric did not improve
Epoch 24/200
 - 155s - loss: 0.0244 - my_iou_metric: 0.9485 - val_loss: 0.2498 - val_my_iou_metric: 0.8731

Epoch 00024: val_my_iou_metric did not improve
Epoch 25/200
 - 155s - loss: 0.0248 - my_iou_metric: 0.9499 - val_loss: 0.2345 - val_my_iou_metric: 0.8588

Epoch 00025: val_my_iou_metric did not improve
Epoch 26/200
 - 155s - loss: 0.0221 - my_iou_metric: 0.9532 - val_loss: 0.2865 - val_my_iou_metric: 0.8676

Epoch 00026: val_my_iou_metric did not improve
Epoch 27/200
 - 155s - loss: 0.0206 - my_iou_metric: 0.9558 - val_loss: 0.2889 - val_my_iou_metric: 0.8626

Epoch 00027: val_my_iou_metric did not improve
Epoch 28/200
 - 155s - loss: 0.0200 - my_iou_metric: 0.9554 - val_loss: 0.2068 - val_my_iou_metric: 0.8639

Epoch 00028: val_my_iou_metric did not improve
Epoch 29/200
 - 155s - loss: 0.0197 - my_iou_metric: 0.9584 - val_loss: 0.2815 - val_my_iou_metric: 0.8675

Epoch 00029: val_my_iou_metric did not improve
Epoch 00029: early stopping


резюме


если увеличить размер сети и взять с того же kaggle сеть с блоками resnet, то можно получить val_my_iou_metric около 0.93, что вполне прилично и нормально. В сети есть статьи про обработку сейсмики и там приемлемый результат при обработке куба 58х58х12 не меньше 0.95
Но тут началась главная проблема — склеить обратно 32х32 в 101х101 не получилось.
Все способы давали LB не больше 0.75
посколько идеи по склейке иссякли и стало ясно, что дело в гамме и интенсивности.
Переделал в 64х64 и об этом следующий пост.

Это cell для визуальной проверки подпрограммы аугментации размера.
# visual test augmentation
# визуально проверяем программу аугментации размеров.
# справа и слева прямые линии, сверху и снизу сигмоида.
# размер и расположение выбирается случайно
# На картинку добавляется сетка, что бы явно была видна аугментация.
#
#
#

import cv2

fold_id = 0

valid_XX = np.array(train_df['images'][(train_df.fold == fold_id) & train_df["line"].values].tolist() )
valid_YY = np.array(train_df['masks' ][(train_df.fold == fold_id) & train_df["line"].values].tolist() )
train_XX = np.array(train_df['images'][(train_df.fold != fold_id) & train_df["line"].values].tolist() )
train_YY = np.array(train_df['masks' ][(train_df.fold != fold_id) & train_df["line"].values].tolist() )

valid_XX = valid_XX.reshape(-1, cols, rows, 1)
valid_YY = valid_YY.reshape(-1, cols, rows, 1)
train_XX = train_XX.reshape(-1, cols, rows, 1)
train_YY = train_YY.reshape(-1, cols, rows, 1)

print ("XX ", train_XX.dtype, train_XX.shape, train_YY.dtype, train_YY.shape)
print ("YY ", train_YY.dtype, train_YY.shape, valid_YY.dtype, valid_YY.shape)
#  # NEW
from joblib import Parallel, delayed
import math
from scipy import ndimage


def sigmd(x):
    return 1 / (1 + math.exp(np.float(-x)))

def aug_img_cv(_xy, p):    
    print (p.astype('int'))
    C = np.zeros((w_cols, w_rows), dtype='float')
    R = np.zeros((w_cols, w_rows), dtype='float')
    
    a1 = [p[1] + (p[3] - p[1])* sigmd(p[0]*(x-w_cols/2.)/float(w_cols)) for x in np.arange(0.,float(w_cols),1.)]
    a2 = [p[2] + (p[4] - p[2])* x/float(w_cols) for x in np.arange(0.,float(w_cols),1.)]
    b1 = [p[7] + (p[5] - p[7])* sigmd(p[0]*(x-w_cols/2.)/float(w_cols)) for x in np.arange(0.,float(w_cols),1.) ]
    b2 = [p[8] + (p[6] - p[8])* x/float(w_cols) for x in np.arange(0.,float(w_cols),1.)]
    ba1= np.subtract(b1, a1)/w_cols
    ba2= np.subtract(b2, a2)/w_rows

    for i in range(w_cols):
        C[:,i] = np.array([ (ba1[i])*j + a1[i] for j in np.arange(0.,float(w_cols),1.)])
        R[:,i] = np.array([ (ba2[i])*j + a2[i] for j in np.arange(0.,float(w_rows),1.)])
            
    _t = np.array(list(_xy)[0]).squeeze().astype('float')
###

    _t[50,:] = 255
    _t[:,50] = 255
    _t[25,:] = 255
    _t[:,25] = 255
    _t[75,:] = 255
    _t[:,75] = 255

    _im = np.zeros((cols, rows,3), dtype='uint8')
    _im[:,:,0] = _t #p.array(list(_xy)[0]).squeeze()
    _im[50,:,2] = 255
    _im[:,50,2] = 255
    for i in range(w_cols):
        for j in range(w_rows):
            _im[0,0] = _im[0,0] 
            _im[int(C[i,j])%cols,int(R[i,j])%rows,1] = 125
#
####
    _zx = ndimage.map_coordinates(_t, [C, R], order=1)
    _t = np.array(list(_xy)[1]).squeeze().astype('float')
    _zy = ndimage.map_coordinates(_t, [C, R], order=1)

#####    
#    pts = p[[2,1,4,3,6,5,8,7]].reshape((-1,1,2)).astype('int')
#    cv2.polylines(_im,[pts],True,(0,0,255))
    fig, axes = plt.subplots(1,2,figsize=(10,5))
    axes[0].set_axis_off()
    axes[0].imshow(_im, cmap='gray')
    axes[1].set_axis_off()
    axes[1].imshow(_zx, cmap='gray')
    plt.show(block=True)
#####

    return np.rint(_zx), np.rint(_zy)

co_size = w_cols*0.1

def augment(_x, _y):
    parallel = Parallel(3, verbose=0)
    p = np.zeros((_x.shape[0],11), dtype='float')
    p[:,0] = np.random.sample(size=_x.shape[0]) * 8. + 1. # sigmoid 
    
    p[:,1] = np.random.sample(size=_x.shape[0]) *( cols - w_cols + co_size )  # cols left top
    p[:,2] = np.random.sample(size=_x.shape[0]) *( rows - w_rows + co_size )  # rows left top   
    
    #p[:,3] = np.random.sample(size=_x.shape[0]) * 2.*co_size - co_size + p[:,1]
    b = np.maximum(p[:,1] - co_size,0)
    a = p[:,1] + co_size
    p[:,3] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a
    b = np.minimum(p[:,2] + w_rows + co_size, rows)
    a = p[:,2] + w_rows - co_size
    p[:,4] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a
    
#    p[:,5] = np.random.sample(size=_x.shape[0]) * 2.*co_size - co_size + p[:,1] + w_cols
#    p[:,6] = np.random.sample(size=_x.shape[0]) * 2.*co_size - co_size + p[:,2] + w_rows

    b = np.minimum(p[:,1] + w_cols + co_size, cols)
    a = p[:,1] + w_cols - co_size
    p[:,7] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a
    b = np.maximum(p[:,2] - co_size,0)
    a = p[:,2] + co_size
    p[:,8] = np.random.sample(size=_x.shape[0]) * ( b - a ) + a

#    p[:,5] = p[:,3] + float(w_rows)
#    p[:,7] = p[:,1] + float(w_rows)
    p[:,5] = p[:,3] + (p[:,7] - p[:,1])
    p[:,6] = p[:,8] + (p[:,4] - p[:,2])
    
    p[:,9] = np.random.sample(size=_x.shape[0]) + 0.33  # gamma 0.33...1.33
    p[:,10] = np.random.sample(size=_x.shape[0])*5. - 10. # color +- 5

#    p[p<0] = 0
#    p[p>=cols] = cols-1
    for k in range(_x.shape[0]):
        if p[k,5] > cols:
            p[k,[1,3,5,7]] -= (p[k,5] - cols)
        if p[k,6] > rows:
            p[k,[2,4,6,8]] -= (p[k,6] - rows)
            
        
#    print (' prob ready ')
    _txy = parallel(delayed(aug_img_cv)(xy,p[idx,:]) for idx,xy in enumerate(zip(_x,_y)))
    _tx = np.array(_txy)[:,0,:,:].reshape(-1,w_cols,w_cols,1)
    _ty = np.array(_txy)[:,1,:,:].reshape(-1,w_cols,w_cols,1)
    
    return _tx.reshape(-1,w_cols,w_rows,1), _ty.reshape(-1,w_cols,w_rows,1)

i_shift = 0
#for k in tqdm(range(1)):
#    train_x1, train_y1 = augment(train_XX, train_YY)
train_x1, train_y1 = augment(train_XX[i_shift:i_shift+4], train_YY[i_shift:i_shift+4])

Комментарии

Популярные сообщения из этого блога

Распили её правильно. А/В разрез генеральной совокупности

Над пропастью во лжи

Простота и cложность примитивов или как определить ненужный препроцессинг для нейронной сети