Re:Tensorflow Object Detection API maskrcnn Python inference getting different results from C++ demo

zero__heisenburgzero · ‎01-11-2021

I trained a Mask-RCNN model with custom datasets. I have successfully converted it to IR format and was able it run it through C++ mask rcnn demo.

Since, I wasn't able to find any sample python code that loads TF object detection API maskrcnn model. I tried to build the python code by tracing the C++ demo's step.

I was able to load the model and run it, but I'm getting different results compared to C++ version. More like, I'm not getting any valid results at all. All probabilities are close to zero.

Here is the code

from __future__ import print_function
import sys
import os
from argparse import ArgumentParser, SUPPRESS
import cv2
import numpy as np
import logging as log
from openvino.inference_engine import IECore


def build_argparser():
    parser = ArgumentParser(add_help=False)
    args = parser.add_argument_group("Options")
    args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
    args.add_argument("-m", "--model", help="Required. Path to an .xml file with a trained model.",
        required=False, type=str)
    args.add_argument("-i", "--input", help="Required. Path to image file.",
        required=False, type=str, nargs="+")
    args.add_argument("-l", "--cpu_extension",
        help="Optional. Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.",
        type=str, default=None)
    args.add_argument("-d", "--device",
        help="Optional. Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified (CPU by default)",
        default="CPU", type=str)
    args.add_argument("--labels", help="Optional. Labels mapping file", default=None, type=str)
    args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int)

    return parser


def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()
    log.info("Loading Inference Engine")
    ie = IECore()
    # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
    args.model = "D:\\_Models\\mask_rcnn_dummy_test\\ov_nail_maskrcnn.xml"
    img_path = "D:\\openvino_classifier_python\\images\\mtr2.jpg"

    args.input = [img_path]
    # print(args.input)

    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = ie.read_network(model=model_xml, weights=model_bin)
    net.add_outputs("detection_output")
    # -----------------------------------------------------------------------------------------------------

    # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
    log.info("Device info:")
    versions = ie.get_versions(args.device)
    print("{}{}".format(" "*8, args.device))
    print("{}MKLDNNPlugin version ......... {}.{}".format(" "*8, versions[args.device].major, versions[args.device].minor))
    print("{}Build ........... {}".format(" "*8, versions[args.device].build_number))

    if args.cpu_extension and "CPU" in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
        log.info("CPU extension loaded: {}".format(args.cpu_extension))

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) != 0:
            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                      format(args.device, ', '.join(not_supported_layers)))
            log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                      "or --cpu_extension command line argument")
            sys.exit(1)
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- 3. Read and preprocess input --------------------------------------------
    # input_blob = next(iter(net.inputs))
    input_blob = "image_tensor"
    n, c, h, w = net.inputs[input_blob].shape
    images = np.ndarray(shape=(n, c, h, w))
    images_hw = []
    for i in range(n):
        org_image = cv2.imread(args.input[i])
        # image = cv2.imread(img_path)
        image = org_image.copy()
        ih, iw = image.shape[:-1]
        images_hw.append((ih, iw))
        log.info("File was added: ")
        log.info("        {}".format(args.input[i]))
        if (ih, iw) != (h, w):
            image = cv2.resize(image, (w, h))
            log.warning("Image {} is resized from {} to {}".format(args.input[i], image.shape[:-1], (h, w)))
        image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
        # image = image.astype(np.float32) / 255.0
        # print(type(image[0][0][0]))
        images[i] = image
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- 4. Configure input & output ---------------------------------------------
    # --------------------------- Prepare input blobs -----------------------------------------------------
    log.info("Preparing input blobs")
    assert (len(net.inputs.keys()) == 1 or len(net.inputs.keys()) == 2), "Sample supports topologies only with 1 or 2 inputs"

    print([i for i in iter(net.inputs)])
    print([i for i in iter(net.outputs)])
    # sys.exit(0)
    # input_blob = next(iter(net.inputs))
    input_blob = "image_tensor"
    out_blob = next(iter(net.outputs))
    input_name, input_info_name = "", ""

    for input_key in net.inputs:
        if len(net.inputs[input_key].layout) == 4:
            input_name = input_key
            print(input_name, 'set batch')
            log.info("Batch size is {}".format(net.batch_size))
            net.inputs[input_key].precision = 'U8'
        elif len(net.inputs[input_key].layout) == 2:
            input_info_name = input_key
            print(input_key, "set precision")
            print(net.inputs[input_key].precision)
            # print(net.input_info())
            net.inputs[input_key].precision = 'FP32'
            if net.inputs[input_key].shape[1] != 3 and net.inputs[input_key].shape[1] != 6 or net.inputs[input_key].shape[0] != 1:
                log.error('Invalid input info. Should be 3 or 6 values length.')

    net_input =  { "image_tensor" : images, "image_info": images.shape[1:]}

    # --------------------------- Prepare output blobs ----------------------------------------------------
    # log.info('Preparing output blobs')

    # output_info = net.outputs["reshape_do_2d"]
    # print(output_info.precision)
    # print(output_info.shape)
    # print(net.outputs.keys())
    # output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))]
    # print(output_name, output_info)
    # for output_key in net.outputs:
    #     print(net.layers[output_key].type)
    #     if net.layers[output_key].type == "DetectionOutput":
    #         output_name, output_info = output_key, net.outputs[output_key]

    # if output_name == "":
    #     log.error("Can't find a DetectionOutput layer in the topology")

    # output_dims = output_info.shape
    # if len(output_dims) != 4:
    #     log.error("Incorrect output dimensions for SSD model")
    # max_proposal_count, object_size = output_dims[2], output_dims[3]

    # if object_size != 7:
    #     log.error("Output item should have 7 as a last dimension")

    # print(out_blob)
    # print(output_name)
    # print(output_info)

    

    # output_info.precision = "FP32"

    out_blob = "reshape_do_2d"
  
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- Performing inference ----------------------------------------------------
    log.info("Loading model to the device")
    exec_net = ie.load_network(network=net, device_name="CPU")
    log.info("Creating infer request and starting inference")
    res = exec_net.infer(inputs=net_input)
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- Read and postprocess output ---------------------------------------------
    log.info("Processing output blobs")
    # print(res)
    res = res[out_blob]
    print(res.shape)
    print(res)
    boxes, classes = {}, {}
    data = res[0][0]
    data = res

    frame_shape = images_hw[0]

    # probs = res[0, 0, :, 2]
    # for i, p in enumerate(probs):
    #     # print("P: {}".format(p))
    #     if p > 0:
    #         # num_detected += 1
    #         box = res[0, 0, i, 3:]
    #         p1 = (int(box[0] * frame_shape[0]), int(box[1] * frame_shape[1]))
    #         p2 = (int(box[2] * frame_shape[0]), int(box[3] * frame_shape[1]))
    #         org_image = cv2.rectangle(org_image, p1, p2, (0, 0, 255), 3)

    # cv2.imshow("org_image", org_image)
    # cv2.waitKey(0)


    for number, proposal in enumerate(data):
        if proposal[2] > 0:
            imid = np.int(proposal[0])
            ih, iw = images_hw[imid]
            label = np.int(proposal[1])
            confidence = proposal[2]
            xmin = np.int(iw * proposal[3])
            ymin = np.int(ih * proposal[4])
            xmax = np.int(iw * proposal[5])
            ymax = np.int(ih * proposal[6])
            print("[{},{}] element, prob = {:.6}    ({},{})-({},{}) batch id : {}"\
                .format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="")
            if proposal[2] > 0.5:
                print(" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>WILL BE PRINTED!")
                if not imid in boxes.keys():
                    boxes[imid] = []
                boxes[imid].append([xmin, ymin, xmax, ymax])
                if not imid in classes.keys():
                    classes[imid] = []
                classes[imid].append(label)
            else:
                print()

    for imid in classes:
        tmp_image = cv2.imread(args.input[imid])
        for box in boxes[imid]:
            cv2.rectangle(tmp_image, (box[0], box[1]), (box[2], box[3]), (232, 35, 244), 2)
        cv2.imwrite("out.bmp", tmp_image)
        log.info("Image out.bmp created!")
    # -----------------------------------------------------------------------------------------------------

    log.info("Execution successful\n")
    log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool")


if __name__ == '__main__':
    sys.exit(main() or 0)

I basically did the following steps:

1. Load the model

2. Load the image to 4 dimensional np array

3. Set up the input_blob. For mask rcnn, it has 2 keys.

net_input =  { "image_tensor" : images, "image_info": images.shape[1:]}

4. Set output name : reshape_do_2d

5. Run inference

6. Parse the results.

Wasn't able to figure out what went wrong. I tried loading the images as float and divide by 255.0. But doesn't seem to make a difference.

Iffa_Intel · ‎01-13-2021

Greetings,

You may refer to this documentation: https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN.html

The correct MASKR-CNNmodel file and a python sample application are also included there.

Sincerely,

Iffa

Iffa_Intel · ‎01-21-2021

Greetings,

Intel will no longer monitor this thread since we have provided a solution. If you need any additional information from Intel, please submit a new question.

Sincerely,

Iffa