2024年1月14日发(作者:)

TEMPLATE = appCONFIG += console c++11CONFIG -= app_bundleCONFIG -= qtINCLUDEPATH += /usr/local/include usr/local/include/opencv4 usr/local/include/opencv4/opencv2LIBS += /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_ /usr/local/lib/libopencv_CES += DERS += #include #include #include #include #include #include ""std::string keys = "{ help h | | Print help message. }" "{ @alias | | An alias name of model to extract preprocessing parameters from file. }" "{ zoo | | An optional path to file with preprocessing parameters }" "{ device | 0 | camera device number. }" "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" "{ classes | | Optional path to a text file with names of classes to label detected objects. }" "{ thr | .5 | Confidence threshold. }" "{ nms | .4 | Non-maximum suppression threshold. }" "{ backend | 0 | Choose one of computation backends: " "0: automatically (by default), " "1: Halide language (/), " "2: Intel's Deep Learning Inference Engine (/openvino-toolkit), " "3: OpenCV implementation }" "{ target | 0 | Choose one of target computation devices: " "0: CPU target (by default), " "1: OpenCL, " "2: OpenCL fp16 (half-float precision), " "3: VPU }";using namespace cv;using namespace dnn;float confThreshold, nmsThreshold;

std::vector classes;void postprocess(Mat& frame, const std::vector& out, Net& net);void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);void callback(int pos, void* userdata);std::vector getOutputsNames(const Net& net);int main(int argc, char** argv){ CommandLineParser parser(argc, argv, keys); const std::string modelName = ("@alias"); const std::string zooFile = ("zoo"); keys += genPreprocArguments(modelName, zooFile); parser = CommandLineParser(argc, argv, keys); ("Use this script to run object detection deep learning networks using OpenCV."); if (argc == 1 || ("help")) { essage(); return 0; } confThreshold = ("thr"); nmsThreshold = ("nms"); float scale = ("scale"); Scalar mean = ("mean"); bool swapRB = ("rgb"); int inpWidth = ("width"); int inpHeight = ("height"); CV_Assert(("model")); std::string modelPath = findFile(("model")); std::string configPath = findFile(("config")); // Open file with classes names. if (("classes")) { std::string file = ("classes"); std::ifstream ifs(file.c_str()); if (!_open()) CV_Error(Error::StsError, "File " + file + " not found"); std::string line; while (std::getline(ifs, line)) { _back(line); } } // Load a model. Net net = readNet(modelPath, configPath, ("framework")); ferableBackend(("backend")); ferableTarget(("target")); std::vector outNames = onnectedOutLayersNames(); // Create a window static const std::string kWinName = "Deep learning object detection in OpenCV"; namedWindow(kWinName, WINDOW_NORMAL); int initialConf = (int)(confThreshold * 100); createTrackbar("Confidence threshold, %", kWinName, &initialConf, 99, callback); // Open a video file or an image file or a camera stream.

VideoCapture cap; if (("input")) (("input")); else (("device")); // Process frames. Mat frame, blob; while (waitKey(1) < 0) { cap >> frame; if (()) { waitKey(); break; } // Create a 4D blob from a frame. Size inpSize(inpWidth > 0 ? inpWidth : , inpHeight > 0 ? inpHeight : ); blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false); // Run a model. ut(blob); if (er(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN { resize(frame, frame, inpSize); Mat imInfo = (Mat_(1, 3) << , , 1.6f); ut(imInfo, "im_info"); } std::vector outs; d(outs, outNames); postprocess(frame, outs, net); // Put efficiency information. std::vector layersTimes; double freq = getTickFrequency() / 1000; double t = fProfile(layersTimes) / freq; std::string label = format("Inference time: %.2f ms", t); putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); imshow(kWinName, frame); } return 0;}void postprocess(Mat& frame, const std::vector& outs, Net& net){ static std::vector outLayers = onnectedOutLayers(); static std::string outLayerType = er(outLayers[0])->type; std::vector classIds; std::vector confidences; std::vector boxes; if (outLayerType == "DetectionOutput") { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] CV_Assert(() > 0); for (size_t k = 0; k < (); k++) { float* data = (float*)outs[k].data; for (size_t i = 0; i < outs[k].total(); i += 7)

{ float confidence = data[i + 2]; if (confidence > confThreshold) { int left = (int)data[i + 3]; int top = (int)data[i + 4]; int right = (int)data[i + 5]; int bottom = (int)data[i + 6]; int width = right - left + 1; int height = bottom - top + 1; if (width * height <= 1) { left = (int)(data[i + 3] * ); top = (int)(data[i + 4] * ); right = (int)(data[i + 5] * ); bottom = (int)(data[i + 6] * ); width = right - left + 1; height = bottom - top + 1; } _back((int)(data[i + 1]) - 1); // Skip 0th background class id. _back(Rect(left, top, width, height)); _back(confidence); } } } } else if (outLayerType == "Region") { for (size_t i = 0; i < (); ++i) { // Network produces output blob with a shape NxC where N is a number of // detected objects and C is a number of classes + 4 where the first 4 // numbers are [center_x, center_y, width, height] float* data = (float*)outs[i].data; for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) { Mat scores = outs[i].row(j).colRange(5, outs[i].cols); Point classIdPoint; double confidence; minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); if (confidence > confThreshold) { int centerX = (int)(data[0] * ); int centerY = (int)(data[1] * ); int width = (int)(data[2] * ); int height = (int)(data[3] * ); int left = centerX - width / 2; int top = centerY - height / 2; _back(classIdPoint.x); _back((float)confidence); _back(Rect(left, top, width, height)); } } } } else CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); std::vector indices; NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); for (size_t i = 0; i < (); ++i) { int idx = indices[i]; Rect box = boxes[idx];

drawPred(classIds[idx], confidences[idx], box.x, box.y, box.x + , box.y + , frame); }}void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame){ rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); std::string label = format("%.2f", conf); if (!()) { CV_Assert(classId < (int)()); label = classes[classId] + ": " + label; } int baseLine; Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); top = max(top, ); rectangle(frame, Point(left, top - ), Point(left + , top + baseLine), Scalar::all(255), FILLED); putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar());}void callback(int pos, void*){ confThreshold = pos * 0.01f;}的内容ocv_install_example_src(dnn *.cpp *.hpp )set(OPENCV_DNN_SAMPLES_REQUIRED_DEPS opencv_core opencv_imgproc opencv_dnn opencv_imgcodecs opencv_videoio opencv_highgui)ocv_check_dependencies(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})if(NOT BUILD_EXAMPLES OR NOT OCV_DEPENDENCIES_FOUND) return()endif()project(dnn_samples)ocv_include_modules_recurse(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})file(GLOB_RECURSE dnn_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)foreach(sample_filename ${dnn_samples}) ocv_define_sample(tgt ${sample_filename} dnn) ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_DNN_SAMPLES_REQUIRED_DEPS})endforeach()// This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at /// Usage example: ./object_detection_ --video=4// ./object_detection_ --image=#include #include #include #include

#include #include const char* keys ="{help h usage ? | | Usage examples: ntt./object_detection_ --image= ntt./object_detection_ --video=run_4}""{image i || input image }""{video v || input video }";using namespace cv;using namespace dnn;using namespace std;// Initialize the parametersfloat confThreshold = 0.5; // Confidence thresholdfloat nmsThreshold = 0.4; // Non-maximum suppression thresholdint inpWidth = 416; // Width of network's input imageint inpHeight = 416; // Height of network's input imagevector classes;// Remove the bounding boxes with low confidence using non-maxima suppressionvoid postprocess(Mat& frame, const vector& out);// Draw the predicted bounding boxvoid drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);// Get the names of the output layersvector getOutputsNames(const Net& net);int main(int argc, char** argv){ CommandLineParser parser(argc, argv, keys); ("Use this script to run object detection using YOLO3 in OpenCV."); if (("help")) { essage(); return 0; } // Load names of classes string classesFile = ""; ifstream ifs(classesFile.c_str()); string line; while (getline(ifs, line)) _back(line);

// Give the configuration and weight files for the model String modelConfiguration = ""; String modelWeights = "s"; // Load the network Net net = readNetFromDarknet(modelConfiguration, modelWeights); ferableBackend(DNN_BACKEND_OPENCV); ferableTarget(DNN_TARGET_CPU);

// Open a video file or an image file or a camera stream. string str, outputFile; VideoCapture cap; VideoWriter video; Mat frame, blob;

try {

outputFile = "yolo_out_"; if (("image")) { // Open the image file str = ("image");

ifstream ifile(str); if (!ifile) throw("error"); (str); e(()-4, (), "_yolo_out_"); outputFile = str; } else if (("video")) { // Open the video file str = ("video"); ifstream ifile(str); if (!ifile) throw("error"); (str); e(()-4, (), "_yolo_out_"); outputFile = str; } // Open the webcaom else (("device"));

} catch(...) { cout << "Could not open the input image/video stream" << endl; return 0; }

// Get the video writer initialized to save the output video if (!("image")) { (outputFile, VideoWriter::fourcc('M','J','P','G'), 28, Size((CAP_PROP_FRAME_WIDTH), (CAP_PROP_FRAME_HEIGHT))); }

// Create a window static const string kWinName = "Deep learning object detection in OpenCV"; namedWindow(kWinName, WINDOW_NORMAL); // Process frames. while (waitKey(1) < 0) { // get frame from the video cap >> frame; // Stop the program if reached end of video if (()) { cout << "Done processing " << endl; cout << "Output file is stored as " << outputFile << endl; waitKey(3000); break; } // Create a 4D blob from a frame. blobFromImage(frame, blob, 1/255.0, cvSize(inpWidth, inpHeight), Scalar(0,0,0), true, false);

//Sets the input to the network ut(blob);

// Runs the forward pass to get output of the output layers vector outs; d(outs, getOutputsNames(net));

// Remove the bounding boxes with low confidence postprocess(frame, outs);

// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes) vector layersTimes; double freq = getTickFrequency() / 1000; double t = fProfile(layersTimes) / freq; string label = format("Inference time for a frame : %.2f ms", t); putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));

putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));

// Write the frame with the detection boxes Mat detectedFrame; tTo(detectedFrame, CV_8U); if (("image")) imwrite(outputFile, detectedFrame); else (detectedFrame);

imshow(kWinName, frame);

}

e(); if (!("image")) e(); return 0;}// Remove the bounding boxes with low confidence using non-maxima suppressionvoid postprocess(Mat& frame, const vector& outs){ vector classIds; vector confidences; vector boxes;

for (size_t i = 0; i < (); ++i) { // Scan through all the bounding boxes output from the network and keep only the // ones with high confidence scores. Assign the box's class label as the class // with the highest score for the box. float* data = (float*)outs[i].data; for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) { Mat scores = outs[i].row(j).colRange(5, outs[i].cols); Point classIdPoint; double confidence; // Get the value and location of the maximum score minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); if (confidence > confThreshold) { int centerX = (int)(data[0] * ); int centerY = (int)(data[1] * ); int width = (int)(data[2] * ); int height = (int)(data[3] * ); int left = centerX - width / 2; int top = centerY - height / 2;

_back(classIdPoint.x); _back((float)confidence); _back(Rect(left, top, width, height)); } } }

// Perform non maximum suppression to eliminate redundant overlapping boxes with // lower confidences vector indices; NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); for (size_t i = 0; i < (); ++i) { int idx = indices[i]; Rect box = boxes[idx]; drawPred(classIds[idx], confidences[idx], box.x, box.y, box.x + , box.y + , frame); }}

}// Draw the predicted bounding boxvoid drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame){ //Draw a rectangle displaying the bounding box rectangle(frame, Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);

//Get the label for the class name and its confidence string label = format("%.2f", conf); if (!()) { CV_Assert(classId < (int)()); label = classes[classId] + ":" + label; }

//Display the label at the top of the bounding box int baseLine; Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); top = max(top, ); rectangle(frame, Point(left, top - round(1.5*)), Point(left + round(1.5*), top + baseLine), Scalar(255, 255, 255), FILLED); putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,0),1);}// Get the names of the output layersvector getOutputsNames(const Net& net){ static vector names; if (()) { //Get the indices of the output layers, i.e. the layers with unconnected outputs vector outLayers = onnectedOutLayers();

//get the names of all the layers in the network vector layersNames = erNames();

// Get the names of the output layers in names (()); for (size_t i = 0; i < (); ++i) names[i] = layersNames[outLayers[i] - 1]; } return names;}