使用较大尺寸的图片进行图像风格转换时出现oom错误

来源：6-9 图像风格转换训练流程代码实现

Erlla

2019-06-18

传入content图像尺寸为 2000x1126 ，style图像尺寸 1500x1730
使用2070训练，8g显存.训练次数为15次，训练出一张图片后就报oom错误

贴出代码

import os
import math 
import numpy as np
import tensorflow as tf
import time
from PIL import Image
VGG_MEAN = [103.939, 116.779, 123.68] 
import imageio
class VGGNet:
    def __init__(self, data_dict):
        self.data_dict = data_dict # 载入数据
    
    def get_conv_fliter(self, name): # 获取卷积层的参数
        return tf.constant(self.data_dict[name][0],name='conv')
    
    def get_fc_weight(self, name): # 获取全连接层的参数
        return tf.constant(self.data_dict[name][0],name='fc')
    
    def get_bias(self, name): # 获取name对应的偏置
        return tf.constant(self.data_dict[name][1], name='bias')
    
    def conv_layer(self, x, name): # 创建卷积层的函数
        """
            cerate the conv layers ,  x is input 
            x:input image type [batch_number, width, length, channel]
            name: name
        """
        with tf.name_scope(name):
            conv_w = self.get_conv_fliter(name) # 获取w参数
            conv_b = self.get_bias(name) # 获取b参数
            h = tf.nn.conv2d(x, conv_w, [1,1,1,1],padding='SAME') # [1,1,1,1]在各个维度上卷积的步长
            h = tf.nn.bias_add(h, conv_b) # 对偏置进行相加
            # 以上两个步骤代替了 tf.layer.conv2d()
            h = tf.nn.relu(h)
            return h
        
    def pooling_layer(self,x,name): # 创建池化层的函数，池化层不需要任何的VGG16的参数
        return tf.nn.max_pool(x,ksize=[1,2,2,1],
                                strides=[1,2,2,1],
                                padding='SAME',
                                name=name)
    
    def fc_layer(self, x, name, activation=tf.nn.relu): # 创建全连接层的函数，可选择是否经过激活函数
        with tf.name_scope(name):
            fc_w = self.get_fc_weight(name)
            fc_b = self.get_bias(name)
            h = tf.matmul(x, fc_w)
            h = tf.nn.bias_add(h, fc_b)
            h = tf.nn.relu(h)
            if activation is None:
                return h
            else:
                return activation(h)
        
        
    def flatten_layer(self, x, name): 
        with tf.name_scope(name): 
            # x_shape :[batchsize, width,height, channel]
            x_shape = x.get_shape().as_list() # 
            dim = 1
            for d in x_shape[1:]:
                dim *= d # 将后三个连乘
            x = tf.reshape(x, [-1,dim])
            return x
        
    def build(self, x_rgb):
        start_time = time.time()
        print('bulid model ......')
        r,g,b = tf.split(x_rgb, [1,1,1], axis=3)
        x_bgr = tf.concat([b - VGG_MEAN[0],
                          g - VGG_MEAN[1],
                          r - VGG_MEAN[2]], axis=3)
            
        #assert x_bgr.get_shape().as_list()[1:] == [224,224,3]
        # 重建神经网络
        # stage 1
        self.conv1_1 = self.conv_layer(x_bgr, 'conv1_1')
        self.conv1_2 = self.conv_layer(self.conv1_1, 'conv1_2')
        self.pool1 = self.pooling_layer(self.conv1_2,'pool1')
        # stage 2
        self.conv2_1 = self.conv_layer(self.pool1, 'conv2_1')
        self.conv2_2 = self.conv_layer(self.conv2_1,'conv2_2')
        self.pool2 = self.pooling_layer(self.conv2_2,'pool2')
        # stage 3    
        self.conv3_1 = self.conv_layer(self.pool2, 'conv3_1')
        self.conv3_2 = self.conv_layer(self.conv3_1, 'conv3_2')
        self.conv3_3 = self.conv_layer(self.conv3_2, 'conv3_3')
        self.pool3 = self.pooling_layer(self.conv3_3,'pool3') 
        # stage 4
        self.conv4_1 = self.conv_layer(self.pool3, 'conv4_1')
        self.conv4_2 = self.conv_layer(self.conv4_1, 'conv4_2')
        self.conv4_3 = self.conv_layer(self.conv4_2, 'conv4_3')
        self.pool4 = self.pooling_layer(self.conv4_3,'pool4') 
        # stage 5
        self.conv5_1 = self.conv_layer(self.pool4, 'conv5_1')
        self.conv5_2 = self.conv_layer(self.conv5_1,'conv5_2')
        self.conv5_3 = self.conv_layer(self.conv5_2, 'conv5_3')
        self.pool5 = self.pooling_layer(self.conv5_3,'pool5')     
        '''
        # flatten
        self.flatten5 = self.flatten_layer(self.pool5,'flatten')
        #fully connect
        self.fc6 = self.fc_layer(self.flatten5, 'fc6')
        self.fc7 = self.fc_layer(self.fc6, 'fc7')
        self.fc8 = self.fc_layer(self.fc7, 'fc8',activation=None) # 全连接层8无需经过激活函数
        self.prob = tf.nn.softmax(self.fc8, name='prob')
        '''
        print('buliding finished : %4ds'%(time.time() - start_time))

vgg_16_npy_path = './vgg16.npy'
content_img_path = './source_image/content.jpg' # 内容特征图片路径
style_image_path = './source_image/style.png' # 风格特征图片路径
num_steps = 15 # 图像风格算法训练次数
learning_rate = 15 # 学习率
lambda_c = 0.1 # 内容损失函数系数
lambda_s = 500 # 风格损失函数系数
output_dir = './run_style_transfer' # 指定输出文件夹
if not os.path.exists(output_dir): 
    os.mkdir(output_dir)


def initial_result(shape,mean,stddev):
    initial = tf.truncated_normal(shape,mean=mean,stddev=stddev) 
    return tf.Variable(initial)


def read_img(img_name):
    img = Image.open(img_name)
    np_img = np.array(img)
    np_img = np.asarray([np_img],dtype=np.int32)
    print(np_img.shape)
    return np_img
# 定义一个计算gram 矩阵的函数
def gram_matrix(x):
    """
    x: [1,width,height,channel]
    
    """
    b,w,h,ch = x.get_shape().as_list(）
    features = tf.reshape(x, [b, h*w, ch]) 
    gram = tf.matmul(features, features, adjoint_a=True) / tf.constant(ch * w * h, tf.float32) # adjoin_a=True
    
    return gram

def get_image_shape(img_name):
    img = Image.open(img_name)
    np_img = np.array(img)
    a = np_img.shape
    img_width =a[0]
    img_height = a[1]
    print(np_img.shape)
    return img_width,img_height
content_img_width,content_img_height = get_image_shape(content_img_path)
style_img_width,style_img_height = get_image_shape(style_image_path)
result = initial_result((1, content_img_width, content_img_height, 3), 127.5, 20) # 创建初始化图像，均值为127.5 方差为20
#读取风格图像和内容图像
content_val = read_img(content_img_path)
style_val = read_img(style_image_path)


content = tf.placeholder(tf.float32, shape=[1, content_img_width, content_img_height, 3])
style = tf.placeholder(tf.float32, shape=[1, style_img_width, style_img_height, 3])

data_dict = np.load(vgg_16_npy_path, encoding='latin1').item()
vgg_for_content = VGGNet(data_dict)
vgg_for_style = VGGNet(data_dict)
vgg_for_result = VGGNet(data_dict) 

vgg_for_content.build(content)
vgg_for_style.build(style)
vgg_for_result.build(result)

content_feature = [
    vgg_for_content.conv1_2,
    # vgg_for_content.conv2_2,
    # vgg_for_content.conv3_3,
    # vgg_for_content.conv4_3,
    # vgg_for_content.conv5_3
]

# 结果图像内容特征
result_content_feature = [
    vgg_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    # vgg_for_result.conv4_3,
    # vgg_for_result.conv5_3
]


style_feature = [
    # vgg_for_style.conv1_2,
    # vgg_for_style.conv2_2,
    #vgg_for_style.conv3_3,
    vgg_for_style.conv4_3,
    # vgg_for_style.conv5_3
]
style_gram = [gram_matrix(feature) for feature in style_feature]
# 结果图像风格特征
result_style_feature = [
    # vgg_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    #vgg_for_result.conv3_3,
    vgg_for_result.conv4_3,
    # vgg_for_result.conv5_3
]

#计算gram 用于计算风格损失

result_style_gram = \
    [gram_matrix(feature) for feature in result_style_feature]
    

content_loss = tf.zeros(1, tf.float32)
for c, c_ in zip(content_feature, result_content_feature):
    content_loss += tf.reduce_mean((c - c_) ** 2, [1,2,3]) 



style_loss = tf.zeros(1,tf.float32)
for s, s_ in zip(style_gram, result_style_gram):
    style_loss += tf.reduce_mean((s - s_) ** 2, [1, 2])
  
loss = content_loss * lambda_c + style_loss * lambda_s
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

init_op = tf.global_variables_initializer() # 初始化一个op
with tf.Session() as sess: 
    sess.run(init_op) # 运行sess
    for step in range(num_steps): 
        loss_value, content_loss_value, style_loss_value,_ =sess.run([loss, content_loss, style_loss, train_op],feed_dict={content:content_val,style:style_val})
        print('step:%d, loss_value:%8.4f,content_value:%8.4f,style_loss:%8.4f' \
              %(step+1, loss_value[0], content_loss_value[0], style_loss_value[0]))
    
        result_img_path = os.path.join(output_dir,'result-%05d.jpg'%(step+1)) 
        
        result_val = result.eval(session=sess)[0]
        result_val = np.clip(result_val,0,255)
        img_arr = np.asarray(result_val,np.uint8) 
        img = Image.fromarray(img_arr) # 转换为图片
        img.save(result_img_path)# 保存图片

错误代码

---------------------------------------------------------------------------
ResourceExhaustedError                    Traceback (most recent call last)
E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
   1291     try:
-> 1292       return fn(*args)
   1293     except errors.OpError as e:

E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1276       return self._call_tf_sessionrun(
-> 1277           options, feed_dict, fetch_list, target_list, run_metadata)
   1278 

E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
   1366         self._session, options, feed_dict, fetch_list, target_list,
-> 1367         run_metadata)
   1368 

ResourceExhaustedError: OOM when allocating tensor with shape[1,64,1730,1500] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node conv1_2_1/Conv2D}} = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv1_1_1/Relu, conv1_2/conv)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


During handling of the above exception, another exception occurred:

ResourceExhaustedError                    Traceback (most recent call last)
<ipython-input-5-84df424d3c3a> in <module>
      5     sess.run(init_op) # 运行sess
      6     for step in range(num_steps): # loss content_loss等为计算目标
----> 7         loss_value, content_loss_value, style_loss_value,_ =sess.run([loss, content_loss, style_loss, train_op],feed_dict={content:content_val,style:style_val})
      8         print('step:%d, loss_value:%8.4f,content_value:%8.4f,style_loss:%8.4f' \
      9               %(step+1, loss_value[0], content_loss_value[0], style_loss_value[0]))

E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
    885     try:
    886       result = self._run(None, fetches, feed_dict, options_ptr,
--> 887                          run_metadata_ptr)
    888       if run_metadata:
    889         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1108     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1109       results = self._do_run(handle, final_targets, final_fetches,
-> 1110                              feed_dict_tensor, options, run_metadata)
   1111     else:
   1112       results = []

E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1284     if handle is None:
   1285       return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1286                            run_metadata)
   1287     else:
   1288       return self._do_call(_prun_fn, handle, feeds, fetches)

E:\anaconda\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
   1306           self._config.experimental.client_handles_error_formatting):
   1307         message = error_interpolation.interpolate(message, self._graph)
-> 1308       raise type(e)(node_def, op, message)
   1309 
   1310   def _extend_graph(self):

ResourceExhaustedError: OOM when allocating tensor with shape[1,64,1730,1500] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node conv1_2_1/Conv2D}} = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv1_1_1/Relu, conv1_2/conv)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'conv1_2_1/Conv2D', defined at:
  File "E:\anaconda\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "E:\anaconda\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "E:\anaconda\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "E:\anaconda\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "E:\anaconda\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "E:\anaconda\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "E:\anaconda\lib\asyncio\base_events.py", line 528, in run_forever
    self._run_once()
  File "E:\anaconda\lib\asyncio\base_events.py", line 1764, in _run_once
    handle._run()
  File "E:\anaconda\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "E:\anaconda\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "E:\anaconda\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "E:\anaconda\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "E:\anaconda\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "E:\anaconda\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "E:\anaconda\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "E:\anaconda\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "E:\anaconda\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "E:\anaconda\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "E:\anaconda\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "E:\anaconda\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "E:\anaconda\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "E:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "E:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "E:\anaconda\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "E:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "E:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "E:\anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-c6ef382594f4>", line 58, in <module>
    vgg_for_style.build(style)
  File "<ipython-input-2-717411ff30c8>", line 70, in build
    self.conv1_2 = self.conv_layer(self.conv1_1, 'conv1_2')
  File "<ipython-input-2-717411ff30c8>", line 23, in conv_layer
    h = tf.nn.conv2d(x, conv_w, [1,1,1,1],padding='SAME') # [1,1,1,1]在各个维度上卷积的步长
  File "E:\anaconda\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 1044, in conv2d
    data_format=data_format, dilations=dilations, name=name)
  File "E:\anaconda\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "E:\anaconda\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "E:\anaconda\lib\site-packages\tensorflow\python\framework\ops.py", line 3272, in create_op
    op_def=op_def)
  File "E:\anaconda\lib\site-packages\tensorflow\python\framework\ops.py", line 1768, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[1,64,1730,1500] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node conv1_2_1/Conv2D}} = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv1_1_1/Relu, conv1_2/conv)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

写回答

1回答