PaddleFluid與Kaggle 貓狗大戰

PaddleFluid簡介

前面已經介紹瞭如何在PaddlePaddle下做圖像分類工作,以及搭配VisualDL做metric可視化。今天,我來嘗試使用PaddleFluid做圖像分類工作。這裡需要說明的是,PaddleFluid更新的頻率太高了,我這裡的代碼是在0.13.0的基礎上寫的,很多更方便的API,我在官方github上的branch看到了,但是暫時無法使用,不過我這裡也會告訴大家這些新的寫法。Fluid文檔太少,需要看代碼研究python接口。

PaddleFluid與Kaggle 貓狗大戰


模型介紹

resnet 搞過圖像的應該都知道,kaiminghe的resnet,

Deep Residual Learning for Image Recognition (15年年底的文章,竟然有9496個citations),就是那個最開始搞過1000多層的網絡的,原理不說了,有好多好多的文章有介紹,隨便一搜就好啦。

這裡,不需要把resnet的每一層用fluid都寫出來,PaddlePaddle的repo裡面有這塊的工作,可供直接複用。

resnet.py

Dog vs Cat

Kaggle網站上找到Dog vs Cat 數據集,

Dogs vs. Cats, 安裝好kaggle-api 後kaggle competitions download -c dogs-vs-cats 即可下載數據集,後面實驗我在訓練集中用80%做訓練數據,20%做驗證集。

Image Reader

def default_mapper(sample):

img, label

= sample

img = image.simple_transform(

img, 256, 224, True, mean=[103.94, 116.78, 123.68])

return img.flatten().astype('float32'), label

def dataset_reader(data_dir, train_val_ratio=0.8):

img_list = []

img2label = dict()

label2id = dict()

sub_dirs = [i for i in os.listdir(data_dir)

if os.path.isdir(i)]

for index, sub_dir in enumerate(sub_dirs):

label2id[sub_dir] = index

sub_files = []

for root, dir, files in os.walk(os.path.join(data_dir, sub_dir)):

sub_files = [os.path.join(root, file) for file in files if file.split(".")[-1]

in ["jpg, jpeg"]]

img_list += sub_files

for file in sub_files:

img2label[file] =sub_dir

random.shuffle(img_list)

train_len = int(train_val_ratio*len(img_list))

train_img_list = img_list[:train_len]

val_img_list = img_list[train_len:]

def train_reader():

for idx, imgfile in enumerate(train_img_list):

try:

data = image.load_image(imgfile)

label = [label2id[img2label[imgfile]], ]

yield [data, label]

except Exception as e:

print "error infor: {0}".format(e.message)

continue

def test_reader():

for idx, imgfile in enumerate(val_img_list):

try:

data = image.load_image(imgfile)

label = [label2id[img2label[imgfile]], ]

yield [data, label]

except Exception as e:

print "error infor: {0}".format(e.message)

continue

return paddle.reader.map_readers(default_mapper, train_reader), paddle.reader.map_readers(default_mapper, test_reader)

data_reader函數主要有兩個部分:

  1. 遍歷所有圖像;
  2. 讀取圖像,生成train,test的生成器;

模型構建

def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):

conv1 = fluid.layers.conv2d(

input=input,

filter_size=filter_size,

num_filters=ch_out,

stride=stride,

padding=padding,

act=None,

bias_attr=False)

return fluid.layers.batch_norm(input=conv1, act=act)

def shortcut(input, ch_out, stride):

ch_in = input.shape[1] # if args.data_format == 'NCHW' else input.shape[-1]

if ch_in != ch_out:

return conv_bn_layer(input, ch_out, 1, stride, 0, None)

else:

return input

def basicblock(input, ch_out, stride):

short = shortcut(input, ch_out, stride)

conv1 = conv_bn_layer(input, ch_out, 3, stride, 1)

conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, act=None)

return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')

def bottleneck(input, ch_out, stride):

short = shortcut(input, ch_out * 4, stride)

conv1 = conv_bn_layer(input, ch_out, 1, stride, 0)

conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1)

conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0, act=None)

return fluid.layers.elementwise_add(x=short, y=conv3, act='relu')

def layer_warp(block_func, input, ch_out, count, stride):

res_out = block_func(input, ch_out, stride)

for i in range(1, count):

res_out = block_func(res_out, ch_out, 1)

return res_out

def resnet(input, class_dim, depth=18, data_format='NCHW'):

cfg = {

18: ([2, 2, 2, 1], basicblock),

34: ([3, 4, 6, 3], basicblock),

50: ([3, 4, 6, 3], bottleneck),

101: ([3, 4, 23, 3], bottleneck),

152: ([3, 8, 36, 3], bottleneck)

}

stages, block_func = cfg[depth]

conv1 = conv_bn_layer(input, ch_out=64, filter_size=7, stride=2, padding=3)

pool1 = fluid.layers.pool2d(

input=conv1, pool_type='avg', pool_size=3, pool_stride=2)

res1 = layer_warp(block_func, pool1, 64, stages[0], 1)

res2 = layer_warp(block_func, res1, 128, stages[1], 2)

res3 = layer_warp(block_func, res2, 256, stages[2], 2)

res4 = layer_warp(block_func, res3, 512, stages[3], 2)

pool2 = fluid.layers.pool2d(

input=res4,

pool_size=7,

pool_type='avg',

pool_stride=1,

global_pooling=True)

out = fluid.layers.fc(input=pool2, size=class_dim, act='softmax')

return out

resnet()配置不同層數的resnet網絡,如resnet50,resnet34, resnet101等,這裡主要是fluid的api,主要是和模型結構相關的,一般來說,經典的模型都會有重現,想使用的同學google一下會有相應的實現,當然也要理解下怎麼做的,這裡我就不深究了,對比著論文應該不難。

訓練

def train(args):

# logger = LogWriter(args.logdir, sync_cycle=10000)

model = resnet

class_dim = args.class_dim

if args.data_format == 'NCHW':

dshape = [3, 224, 224]

else:

dshape = [224, 224, 3]

if not args.data_path:

raise Exception(

"Must specify --data_path when training with imagenet")

train_reader, test_reader = dataset_reader(args.data_path)

print(train_reader)

def train_network():

input = fluid.layers.data(name='image', shape=dshape, dtype='float32')

predict = model(input, class_dim)

label = fluid.layers.data(name='label', shape=[1], dtype='int64')

cost = fluid.layers.cross_entropy(input=predict, label=label)

avg_cost = fluid.layers.mean(x=cost)

batch_acc = fluid.layers.accuracy(input=predict, label=label)

return [avg_cost, batch_acc]

optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)

batched_train_reader = paddle.batch(

paddle.reader.shuffle(

train_reader, buf_size=5120),

batch_size=args.batch_size

)

batched_test_reader = paddle.batch(

test_reader, batch_size=args.batch_size)

def event_handler(event):

if isinstance(event, fluid.EndStepEvent):

print('Pass:{0},Step: {1},Metric: {2}'.format(event.epoch, event.step, event.metrics))

if isinstance(event, fluid.EndEpochEvent):

# save model to dir

#trainer.save_params(".")

avg_cost, acc = trainer.test(reader=batched_test_reader, feed_order=["image", "label"])

print('Pass:{0},val avg_cost: {1}, acc: {2}'.format(event.epoch, avg_cost, acc))

trainer.save_params("./ckpt")

# write the loss, acc to visualdl file

pass

# place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()

place = fluid.CUDAPlace(0)

trainer = fluid.Trainer(

train_func=train_network, optimizer=optimizer, place=place)

print("Begin to Train")

trainer.train(

reader=batched_train_reader,

num_epochs=args.pass_num,

event_handler=event_handler,

feed_order=['image', 'label'])

train()主要包括:

  1. 構建模型,主要是resnet的部分,構建各種不同的layer,直接使用上節的模型構造即可;
  2. 構建訓練相關的部分,配置輸入輸出(input,label),構建cost,acc這類op;
  3. 將train_reader, test_reader包裝為batch_reader;
  4. 配置設備信息,新建Trainer開始訓練;
  5. epoch結束後保存模型的部分還是使用v2的風格,github中Fluid已經支持CheckpointConfig來完成相應的配置,傳給Trainer,但是我這邊應該從pip安裝的是0.13.0的版本,我進系統的文件看了下這部分更改沒有更新,所以就先使用v2的風格,save_params來保存模型參數,個人從技術角度來說更偏愛CheckpointConfig這種config的模式。

訓練日誌

PaddleFluid與Kaggle 貓狗大戰


訓練過程中發現一點問題:GPU佔用率跳動比較頻繁, 佔用率經常跳到0,懷疑是等待問題,看代碼部分發現paddle.reader.map_readers(default_mapper, train_reader)沒有配置多個線程, 應該是由於單個線程在讀image,包括預處理的部分時間過長,造成了gpu計算時間的等待, 修改為paddle.reader.xmap_readers(default_mapper, train_reader, cpu_count(), 51200)之後,運行快了很多,不過還是有比較明顯的GPU佔用率跳的比較明顯,看了下源碼,讀取數據的部分是python實現的,並不是很高效,現在只有一張卡,還好,要是多張卡,等待會更明顯,這部分應該有一個更好的替代方案,可以從底層cpp來實現相應的讀取邏輯,效率會很高。

def xmap_readers(mapper, reader, process_num, buffer_size, order=False):

end = XmapEndSignal()

# define a worker to read samples from reader to in_queue

def read_worker(reader, in_queue):

for i in reader():

in_queue.put(i)

in_queue.put(end)

# define a worker to read samples from reader to in_queue with order flag

def order_read_worker(reader, in_queue):

in_order = 0

for i in reader():

in_queue.put((in_order, i))

in_order += 1

in_queue.put(end)

# define a worker to handle samples from in_queue by mapper

# and put mapped samples into out_queue

def handle_worker(in_queue, out_queue, mapper):

sample = in_queue.get()

while not isinstance(sample, XmapEndSignal):

r = mapper(sample)

out_queue.put(r)

sample = in_queue.get()

in_queue.put(end)

out_queue.put(end)

# define a worker to handle samples from in_queue by mapper

# and put mapped samples into out_queue by order

def order_handle_worker(in_queue, out_queue, mapper, out_order):

ins = in_queue.get()

while not isinstance(ins, XmapEndSignal):

order, sample = ins

r = mapper(sample)

while order != out_order[0]:

pass

out_queue.put(r)

out_order[0] += 1

ins = in_queue.get()

in_queue.put(end)

out_queue.put(end)

def xreader():

in_queue = Queue(buffer_size)

out_queue = Queue(buffer_size)

out_order = [0]

# start a read worker in a thread

target = order_read_worker if order else read_worker

t = Thread(target=target, args=(reader, in_queue))

t.daemon = True

t.start()

# start several handle_workers

target = order_handle_worker if order else handle_worker

args = (in_queue, out_queue, mapper, out_order) if order else (

in_queue, out_queue, mapper)

workers = []

for i in xrange(process_num):

worker = Thread(target=target, args=args)

worker.daemon = True

workers.append(worker)

for w in workers:

w.start()

sample = out_queue.get()

while not isinstance(sample, XmapEndSignal):

yield sample

sample = out_queue.get()

finish = 1

while finish < process_num:

sample = out_queue.get()

if isinstance(sample, XmapEndSignal):

finish += 1

else:

yield sample

return xreader

Image Augmentation

前面,我簡單地跑起了流程,沒有做基本的處理,比如Image Augmentation,如果做了Image Augmentation, 效果應該會更好一些,這裡測試一下Image Augmentation。

讀下上面的代碼, Image Augmentation的部分可以在default_maper的部分實現,這裡可以嘗試下:

DATA_DIM=224

img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))

img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))

def resize_short(img, target_size):

percent = float(target_size) / min(img.size[0], img.size[1])

resized_width = int(round(img.size[0] * percent))

resized_height = int(round(img.size[1] * percent))

img = img.resize((resized_width, resized_height), Image.LANCZOS)

return img

def crop_image(img, target_size, center):

width, height = img.size

size = target_size

if center == True:

w_start = (width - size) / 2

h_start = (height - size) / 2

else:

w_start = random.randint(0, width - size)

h_start = random.randint(0, height - size)

w_end = w_start + size

h_end = h_start + size

img = img.crop((w_start, h_start, w_end, h_end))

return img

def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):

aspect_ratio = math.sqrt(random.uniform(*ratio))

w = 1. * aspect_ratio

h = 1. / aspect_ratio

bound = min((float(img.size[0]) / img.size[1]) / (w**2),

(float(img.size[1]) / img.size[0]) / (h**2))

scale_max = min(scale[1], bound)

scale_min = min(scale[0], bound)

target_area = img.size[0] * img.size[1] * random.uniform(scale_min,

scale_max)

target_size = math.sqrt(target_area)

w = int(target_size * w)

h = int(target_size * h)

i = random.randint(0, img.size[0] - w)

j = random.randint(0, img.size[1] - h)

img = img.crop((i, j, i + w, j + h))

img = img.resize((size, size), Image.LANCZOS)

return img

def rotate_image(img):

angle = random.randint(-10, 10)

img = img.rotate(angle)

return img

def distort_color(img):

def random_brightness(img, lower=0.5, upper=1.5):

e = random.uniform(lower, upper)

return ImageEnhance.Brightness(img).enhance(e)

def random_contrast(img, lower=0.5, upper=1.5):

e = random.uniform(lower, upper)

return ImageEnhance.Contrast(img).enhance(e)

def random_color(img, lower=0.5, upper=1.5):

e = random.uniform(lower, upper)

return ImageEnhance.Color(img).enhance(e)

ops = [random_brightness, random_contrast, random_color]

random.shuffle(ops)

img = ops[0](img)

img = ops[1](img)

img = ops[2](img)

return img

def process_image(sample, mode, color_jitter, rotate):

img_path = sample[0]

img = Image.open(img_path)

#img = sample[0]

if mode == 'train':

if rotate: img = rotate_image(img)

img = random_crop(img, DATA_DIM)

else:

img = resize_short(img, target_size=256)

img = crop_image(img, target_size=DATA_DIM, center=True)

if mode == 'train':

if color_jitter:

img = distort_color(img)

if random.randint(0, 1) == 1:

img = img.transpose(Image.FLIP_LEFT_RIGHT)

if img.mode != 'RGB':

img = img.convert('RGB')

img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255

img -= img_mean

img /= img_std

if mode == 'train' or mode == 'val':

return img, sample[1]

elif mode == 'test':

return [img]

然後修改mapper的部分:

train_mapper = functools.partial(process_image, mode="train", color_jitter=False, rotate=False)

test_mapper = functools.partial(process_image, mode="test")

return paddle.reader.xmap_readers(train_mapper, train_reader, cpu_count(), 51200), paddle.reader.xmap_readers(test_mapper, test_reader, cpu_count(), 5120)

這裡可以對比一下Image Augmentation前後的、在驗證集上的結果:

PaddleFluid與Kaggle 貓狗大戰


很明顯,在完成Image Aug之後,結果有了進一步提升。

所有的源碼都更新在paddle-101, 這裡只做基本的demo,成績這塊未做進一步工作,大家可以嘗試用Fluid刷下榜看看, 有興趣的小夥伴可以玩一玩。


分享到:


相關文章: