Intel® Distribution of OpenVINO™ Toolkit
Community assistance about the Intel® Distribution of OpenVINO™ toolkit, OpenCV, and all aspects of computer vision-related on Intel® platforms.
6404 Discussions

Mobilenet SSD C++ implementation detection wrong on NCSDK v2

idata
Employee
776 Views

Hi guys, I am facing issues trying to implement the live object detector sample provided with ncappzoo v1 in C++ for NCSDK v2. Could you please look into my code or share with me a working code? I have tried row major, col major, changing tensor datatype, rgb and bgr conversion and everything I could think of. I am new to this and would be grateful if you could help me out. It always returns one/two detections for tv monitor and train and nothing else.

 

//the problem is somewhere here bool preprocess_image(const cv::Mat& src_image_mat, cv::Mat& preprocessed_image_mat) { // find ratio of to adjust width and height by to make them fit in network image width and height double width_ratio = (double)NETWORK_IMAGE_WIDTH / (double)src_image_mat.cols; double height_ratio = (double)NETWORK_IMAGE_HEIGHT / (double)src_image_mat.rows; // the largest ratio is the one to use for scaling both height and width. double largest_ratio = (width_ratio > height_ratio) ? width_ratio : height_ratio; cv::resize(src_image_mat, preprocessed_image_mat, cv::Size(), largest_ratio, largest_ratio, CV_INTER_AREA); int mid_row = preprocessed_image_mat.rows / 2.0; int mid_col = preprocessed_image_mat.cols / 2.0; int x_start = mid_col - (NETWORK_IMAGE_WIDTH / 2); int y_start = mid_row - (NETWORK_IMAGE_HEIGHT / 2); cv::Rect roi(x_start, y_start, NETWORK_IMAGE_WIDTH, NETWORK_IMAGE_HEIGHT); preprocessed_image_mat = preprocessed_image_mat(roi); //RGB to BRG //cvtColor(preprocessed_image_mat, preprocessed_image_mat, COLOR_RGB2BGR); return true; } std::shared_ptr<list<networkResults>> getInferenceResults(cv::Mat inputMat, struct ncGraphHandle_t* graphHandle, struct ncFifoHandle_t* fifoIn, struct ncFifoHandle_t* fifoOut) { cv::Mat preprocessed_image_mat; preprocess_image(inputMat, preprocessed_image_mat); if (preprocessed_image_mat.rows != NETWORK_IMAGE_HEIGHT || preprocessed_image_mat.cols != NETWORK_IMAGE_WIDTH) { cout << "Error - preprocessed image is unexpected size!" << endl; return 0; } float_t tensor32[NETWORK_IMAGE_WIDTH * NETWORK_IMAGE_HEIGHT * 3]; uint8_t* image_data_ptr = (uint8_t*)preprocessed_image_mat.data; int chan = preprocessed_image_mat.channels(); int tensor_index = 0; for (int col = 0; col < preprocessed_image_mat.cols; col++) { for (int row = 0; row < preprocessed_image_mat.rows; row++) { int pixel_start_index = col * (preprocessed_image_mat.rows + 0) * chan + row * chan; // TODO: don't hard code // assuming the image is in BGR format here uint8_t blue = image_data_ptr[pixel_start_index + 0]; uint8_t green = image_data_ptr[pixel_start_index + 1]; uint8_t red = image_data_ptr[pixel_start_index + 2]; tensor32[tensor_index++] = (((float_t)blue - networkMean) * networkStd); tensor32[tensor_index++] = (((float_t)green - networkMean) * networkStd); tensor32[tensor_index++] = (((float_t)red - networkMean) * networkStd); } } // queue for inference unsigned int inputTensorLength = NETWORK_IMAGE_HEIGHT * NETWORK_IMAGE_WIDTH * 3 * sizeof(float_t); retCode = ncGraphQueueInferenceWithFifoElem(graphHandle, fifoIn, fifoOut, tensor32, &inputTensorLength, 0); if (retCode != NC_OK) { cout << "Error[" << retCode << "] - could not queue inference." << endl; return 0; } // get the size of the result unsigned int res_length; unsigned int option_length = sizeof(res_length); retCode = ncFifoGetOption(fifoOut, NC_RO_FIFO_ELEMENT_DATA_SIZE, &res_length, &option_length); if (retCode != NC_OK) { cout << "Error[" << retCode << "] - could not get output result size." << endl; return 0; } float_t result_buf[res_length]; retCode = ncFifoReadElem(fifoOut, result_buf, &res_length, NULL); if (retCode != NC_OK) { cout << "Error[" << retCode << "] - could not get output result." << endl; return 0; } list<networkResult> *objectInferenceResults = new list<networkResult>(); float number = *result_buf; for (int n = 0; n < number; ++n) { float* pointer = result_buf + (7 + n * 7); if (pointer[2] > CONFIDENCE_THRESHOLD) { networkResult r; r.class_ID = pointer[1]; r.confidence = pointer[2] * 100; r.x1 = pointer[3] * NETWORK_IMAGE_WIDTH; r.y1 = pointer[4] * NETWORK_IMAGE_HEIGHT; r.x2 = pointer[5] * NETWORK_IMAGE_WIDTH; r.y2 = pointer[6] * NETWORK_IMAGE_HEIGHT; objectInferenceResults->push_back(r); } } return std::shared_ptr<list<networkResult>>(objectInferenceResults); } int main(int argc, char** argv) { // Camera and image frames VideoCapture capture; Mat imgIn; // Key to escape from main loop and close program const int breakKey = 27; // esc == 27 int key; // Struct that will hold inference results std::shared_ptr<list<networkResult>>(Result); // Set up the camera capture.open(CAM_SOURCE); capture.set(CV_CAP_PROP_FRAME_WIDTH, WINDOW_WIDTH); capture.set(CV_CAP_PROP_FRAME_HEIGHT, WINDOW_HEIGHT); // Set up the display window namedWindow(WINDOW_NAME, WINDOW_NORMAL); resizeWindow(WINDOW_NAME, WINDOW_WIDTH, WINDOW_HEIGHT); setWindowProperty(WINDOW_NAME, CV_WND_PROP_ASPECTRATIO, CV_WINDOW_KEEPRATIO); moveWindow(WINDOW_NAME, 0, 0); Point winTextOrigin(0, 20); // Initialize the NCS device(s) and network graphs and FIFO queues initNCS(); unsigned int frame_count = 0; //Initialize Mobilenet SSD graph with IO FIFOs initSSD(); // main loop while (true) { // If the user presses the break key exit the loop key = waitKey(1); if ((key & 0xFF) == breakKey) { break; } // Get a frame from the camera capture >> imgIn; if (frame_count++ >= SKIP_AFTER) { capture >> imgIn; frame_count = 0; } // Flip the image horizontally //flip(imgIn, imgIn, 1); //Inference Result = getInferenceResults(imgIn, ssd_graph_handle, ssd_fifo_in, ssd_fifo_out); // Draw labels and rectangles on the image putText(imgIn, "Press ESC to exit", winTextOrigin, FONT, 2, GREEN, 2); for (list<networkResult>::iterator it = Result->begin(); it != Result->end(); it++) { // Draw a rectangle around the detected face rectangle(imgIn, Point(it->x1, it->y1), Point(it->x2, it->y2), RED, 1, 8, 0); // print the age and gender text to the window putText(imgIn, LABELS[it->class_ID], Point(it->x2, it->y2), FONT, FONT_SIZE, BLACK, 3); } // Show the image in the window imshow(WINDOW_NAME, imgIn); } // end main while loop //Close device and deallocate graph closeNCS(); return 0; }
0 Kudos
0 Replies
Reply