I have a resnet31 (coded in TF2.3, a subclassed tf.keras.Layer) which is running quickly when exported as a savedmodel, but when I try to convert the savemodel to a tflite model, it is very slow. I used the benchmark tool provided in tflite to compute inference time for each kind of layers and it turns out that CONV_2D are very slow, I don’t understand why.

Here is the output of the benchmark tool, you can see that it takes 16s to run CONV_2D:

```
Number of nodes executed: 89
============================== Summary by node type ==============================
(Node type) (count) (avg ms) (avg %) (cdf %) (mem KB) (times called)
CONV_2D 33 16353.751 99.844% 99.844% 0.000 33
WHILE 2 20.836 0.127% 99.971% 0.000 2
ADD 11 1.956 0.012% 99.983% 0.000 11
MAX_POOL_2D 3 1.496 0.009% 99.992% 0.000 3
REDUCE_MAX 1 0.881 0.005% 99.997% 0.000 1
FULLY_CONNECTED 13 0.339 0.002% 99.999% 0.000 13
REVERSE_V2 2 0.029 0.000% 100.000% 0.000 2
TRANSPOSE 2 0.020 0.000% 100.000% 0.000 2
FILL 4 0.016 0.000% 100.000% 0.000 4
STRIDED_SLICE 5 0.011 0.000% 100.000% 0.000 5
PACK 5 0.011 0.000% 100.000% 0.000 5
MUL 2 0.004 0.000% 100.000% 0.000 2
CONCATENATION 1 0.003 0.000% 100.000% 0.000 1
SHAPE 3 0.002 0.000% 100.000% 0.000 3
ONE_HOT 1 0.001 0.000% 100.000% 0.000 1
RESHAPE 1 0.000 0.000% 100.000% 0.000 1
```

I converted the savedmodel this way:

```
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = (tf.lite.Optimize.OPTIMIZE_FOR_LATENCY)
converter.target_spec.supported_ops = (
tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
)
tflite_model = converter.convert()
```

The resnet31 is coded this way:

```
import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Conv2D, BatchNormalization, Lambda, MaxPool2D, AveragePooling2D, Dense, Layer
from tensorflow.keras import Model
class ResnetIdentityV1(Layer):
"""
https://arxiv.org/pdf/1512.03385.pdf
This function is a the basic resnet V1 block (with bottleneck)
"""
def __init__(self, filters, conv_shortcut):
"""
Parameters
----------
conv_shortcut
filters
"""
super(ResnetIdentityV1, self).__init__()
self.filters = filters
# conv 1
self.conv1 = Conv2D(filters=filters,
kernel_size=(3, 3),
strides=1,
padding='same')
self.bn1 = BatchNormalization()
# conv 2
self.conv2 = Conv2D(filters=filters,
kernel_size=(3,3),
strides=1,
padding='same')
self.bn2 = BatchNormalization()
# down-sampling
self.downsample = Sequential()
if conv_shortcut:
self.downsample.add(Conv2D(filters=filters,
kernel_size=(1, 1),
strides=1))
self.downsample.add(BatchNormalization())
else:
self.downsample.add(Lambda(lambda x: x))
def call(self, inputs, training=False, **kwargs):
"""
Parameters
input_tensor is a 4-d tensor : NHWC
training a boolean (used for batch normalization)
"""
shortcut = self.downsample(inputs, training=training)
# first conv-bn-relu
x = self.conv1(inputs)
x = self.bn1(x, training=training)
x = tf.nn.relu(x)
# second conv-bn-relu
x = self.conv2(x)
x = self.bn2(x, training=training)
x = tf.nn.relu(x + shortcut)
return x
class ResnetBlock31V1(Layer):
"""
Block of a resnet31 with n identity blocks in it
https://arxiv.org/pdf/1512.03385.pdf
"""
def __init__(self, filters, blocks):
super(ResnetBlock31V1, self).__init__()
self.identity_block = ResnetIdentityV1
self.res_block = Sequential()
self.res_block.add(self.identity_block(filters, conv_shortcut=True))
for _ in range(1, blocks):
self.res_block.add(self.identity_block(filters, conv_shortcut=False))
def call(self, inputs, training=False, **kwargs):
x = self.res_block(inputs, training=training)
return x
class ResNet31V1(Layer):
"""
Resnet31 Model
https://arxiv.org/pdf/1512.03385.pdf
downsampling of 4x8
"""
def __init__(self, blocks_filters, blocks_repetition,
num_classes=None):
super(ResNet31V1, self).__init__()
self.conv1 = Conv2D(filters=64,kernel_size=3,strides=1,padding='same')
self.conv2 = Conv2D(filters=128,kernel_size=3,strides=1,padding='same')
self.bn1 = BatchNormalization()
self.bn2 = BatchNormalization()
self.pool1 = MaxPool2D(pool_size=2,strides=2,padding='valid')
self.resnet_layers = ()
for f, b in zip(blocks_filters, blocks_repetition):
self.resnet_layers.append(
ResnetBlock31V1(filters=f, blocks=b))
self.n_layers = len(self.resnet_layers)
self.conv3 = Conv2D(filters=256,kernel_size=3,strides=1,padding='same')
self.bn3 = BatchNormalization()
self.pool2 = MaxPool2D(pool_size=2,strides=2,padding='valid')
self.conv4 = Conv2D(filters=256,kernel_size=3,strides=1,padding='same')
self.bn4 = BatchNormalization()
self.pool3 = MaxPool2D(pool_size=(2,1),strides=(2,1),padding='valid')
self.conv5 = Conv2D(filters=512,kernel_size=3,strides=1,padding='same')
self.bn5 = BatchNormalization()
self.conv6 = Conv2D(filters=512,kernel_size=3,strides=1,padding='same')
self.bn6 = BatchNormalization()
def call(self, inputs, training=False, **kwargs):
x = self.conv1(inputs)
x = self.bn1(x, training=training)
x = tf.nn.relu(x)
x = self.conv2(x)
x = self.bn2(x, training=training)
x = tf.nn.relu(x)
x = self.pool1(x)
x = self.resnet_layers(0)(x, training=training)
x = self.conv3(x)
x = self.bn3(x, training=training)
x = tf.nn.relu(x)
x = self.pool2(x)
x = self.resnet_layers(1)(x, training=training)
x = self.conv4(x)
x = self.bn4(x, training=training)
x = tf.nn.relu(x)
x = self.pool3(x)
x = self.resnet_layers(2)(x, training=training)
x = self.conv5(x)
x = self.bn5(x, training=training)
x = tf.nn.relu(x)
x = self.resnet_layers(3)(x, training=training)
x = self.conv6(x)
x = self.bn6(x, training=training)
x = tf.nn.relu(x)
return x
def resnet_v1_31(num_classes=None):
model = ResNet31V1(blocks_filters=(256, 256, 512, 512),
blocks_repetition=(1, 2, 5, 3),
num_classes=num_classes)
return model
```

Thank you in advance for your help !