Bert

Method of pretraining language representations which obtain SOA on a wide range of NLP tasks.

Mobile BERT

< 100 ms latency (4.4x faster)
< 100 MB size (77% reduction)
same accuracy

TF Lite Support Library

Android
iOS

// 1. Load your model
MyImageClassifier classifier = new MyImageClassifier(activity);
MyImageClassifier.Inputs inputs = classifier.createInputs();

// 2. Transform your data.
inputs.loadImage(rgbFrameBitmap);

// Run inference.
MyimageClassifier.Outputs outputs = classifier.run(inputs);

// 4. Use the resulting output.
Map<String, float> labeledProbabilities = outputs.getOutput();

Convert custom model

model = build_your_model()
tf.keras.experimental.export_saved_model(model, saved_model_dir)

# Convert Keras model to TensorFlow Lite model.
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)

# Use new converter
# * Better debuggabilty, source file location identification, control flow support
converter.experimental_new_converter = True

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILDINS,
                                       tf.lite.OpsSet.SELECT_TF_OPS]

tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tf_model)

TF Lite interpreter

private fun initializeInterpreter() {
    val model = loadModelFile(context.assets)

    val delegate = new GpuDelegate()
    // delegate = new NnApiDelegate()
    tfliteOptions.addDelegate(delegate)

    this.interpreter = Interpreter(model, tfliteOptions)
}

private fun classify(bitmap: Bitmap): String {
    val resizedImage = Bitmap.createScaledBitmap(bitmap, ...)
    val inputByteBuffer = convertBitmapToByteBuffer(resizedImage)
    val output = Array(1) { FloatArray(OUTPUT_CLASSES_COUNT) }

    this.interpreter?.run(inputByteBuffer, output)
}

Per-op Profiling

Identify performance bottlenecks

bazel build -c opt \
    --config=android_arm64 --cxxopt='--std=c++11' \
    --copt=-DTFLITE_PROFILING_ENABLED \
    //tensorflow/lite/tools/benchmark:benchmark_model

adb push .../benchmark_model /data/local/tmp
adb shell taskset f0 /data/local/tmp/benchmark_model