// C++ Parsing bool Detector::parse_custom_yolo(const Blob::Ptr& blob, float cof_threshold, const unsigned long resized_im_h, const unsigned long resized_im_w, const unsigned long original_im_h, const unsigned long original_im_w, std::vector& objects) { LockedMemory blobMapped = as(blob)->rmap(); const float* output_blob = blobMapped.as(); const int num = 2; const int coords = 4; const int classes = 35; std::vector anchors = { 3.638, 5.409, 3.281, 4.764 }; const int out_blob_h = static_cast(blob->getTensorDesc().getDims()[2]); const int out_blob_w = static_cast(blob->getTensorDesc().getDims()[3]); auto side_h = out_blob_h; auto side_w = out_blob_w; auto side_square = side_h * side_w; for (int i = 0; i < side_square; ++i) { int row = i / side_h; int col = i % side_w; for (int n = 0; n < num; ++n) { int obj_index = EntryIndex(side_h, side_w, coords, classes, n * side_h * side_w + i, coords); int box_index = EntryIndex(side_h, side_w, coords, classes, n * side_h * side_w + i, 0); float scale = sigmoid(output_blob[obj_index]); if (scale < cof_threshold) continue; double x = (col + sigmoid(output_blob[box_index + 0 * side_square])) * resized_im_w / side_w; double y = (row + sigmoid(output_blob[box_index + 1 * side_square])) * resized_im_h / side_w; double height = std::exp(output_blob[box_index + 3 * side_square]) * anchors[2 * n + 1] * resized_im_h / side_h; double width = std::exp(output_blob[box_index + 2 * side_square]) * anchors[2 * n] * resized_im_w / side_w; x = x - width / 2; y = y - height / 2; width = x + width; height = y + height; // Scale coordinates x = x * original_im_w / resized_im_w; y = y * original_im_h / resized_im_h; width = width * original_im_w / resized_im_w; height = height * original_im_h / resized_im_h; for (int j = 0; j < classes; ++j) { int class_index = EntryIndex(side_h, side_w, coords, classes, n * side_square + i, coords + 1 + j); float prob = scale * sigmoid(output_blob[class_index]); if (prob < cof_threshold) continue; DetectionObject obj(x, y, height, width, j, prob, 1, 1); objects.push_back(obj); } } } if (objects.size() == 0) return false; else return true; } // Python Parsing def parse_yolo_region(predictions, resized_image_shape, original_im_shape, params, threshold, is_proportional): # ------------------------------------------ Validating output parameters ------------------------------------------ _, _, out_blob_h, out_blob_w = predictions.shape # assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \ # "be equal to width. Current height = {}, current width = {}" \ # "".format(out_blob_h, out_blob_w) # ------------------------------------------ Extracting layer parameters ------------------------------------------- orig_im_h, orig_im_w = original_im_shape resized_image_h, resized_image_w = resized_image_shape objects = list() size_normalizer = (resized_image_w, resized_image_h) if params.isYoloV3 else (params.side[0], params.side[1]) bbox_size = params.coords + 1 + params.classes # ------------------------------------------- Parsing YOLO Region output ------------------------------------------- def sigmoid(x): return 1. / (1. + np.exp(-x)) for row, col, n in np.ndindex(params.side[0], params.side[1], params.num): # Getting raw values for each detection bounding box bbox = predictions[0, n * bbox_size:(n + 1) * bbox_size, row, col] # x, y, width, height, object_probability = bbox[:5] x, y = sigmoid(bbox[:2]) width, height = bbox[2:4] object_probability = sigmoid(bbox[4]) class_probabilities = sigmoid(bbox[5:]) if object_probability < threshold: continue # Process raw value x = (col + x) / params.side[1] y = (row + y) / params.side[0] # Value for exp is very big number in some cases so following construction is using here try: width = exp(width) height = exp(height) except OverflowError: continue # Depends on topology we need to normalize sizes by feature maps (up to YOLOv3) or by input shape (YOLOv3) width = width * params.anchors[2 * n] / size_normalizer[1] height = height * params.anchors[2 * n + 1] / size_normalizer[0] class_id = np.argmax(class_probabilities) confidence = class_probabilities[class_id] * object_probability if confidence < threshold: continue objects.append(scale_bbox(x=x, y=y, height=height, width=width, class_id=class_id, confidence=confidence, im_h=orig_im_h, im_w=orig_im_w, is_proportional=is_proportional)) return objects