opencv Mat与ffmpeg Frame数据互转

-- TOC --

用opencv采集的摄像头数据,是Mat对象,在视频编码之前,需要转换成ffmpeg对应的frame数据。反过来,decode出来的frame,如果要用opencv的imshow显示出来,需要将frame数据再转成Mat对象。

下面这段代码的逻辑是这样的:先读取默认摄像头的数据,读取一个固定的帧数,每一帧数据都做Mat --> frame的转换,encode后写入out.bin这个文件;然后decode out.bin,得到一个个frame,每一个frame数据都做frame --> Mat转换,最后由opencv imshow显示出来。

#include <cstdio>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;

#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#pragma comment(lib, "opencv_world455d.lib")

extern "C" {
    #include <libavutil/opt.h>
    #include <libavutil/imgutils.h>
    #include <libavcodec/avcodec.h>
    #include <libswscale/swscale.h>
}

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "swscale.lib")


string fname{ "out.bin" };
#define INBUF_SIZE  4096


static void encode(AVCodecContext* enc_ctx, AVFrame* frame, AVPacket* pkt) {
    int ret;

    /* send the frame to the encoder */
    ret = avcodec_send_frame(enc_ctx, frame);
    if (ret < 0) {
        cerr << "error sendinig a frame to encoder" << endl;
        exit(1);
    }

    while (ret >= 0) {
        // receive packet
        ret = avcodec_receive_packet(enc_ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            cerr << "error during encoding" << endl;
            exit(1);
        }

        // write packet
        ofstream fout{ fname, ios::binary | ios::app};
        if (!fout.is_open()) {
            cerr << "open out.bin in encode() error" << endl;
            exit(1);
        }
        fout.write((const char*)pkt->data, pkt->size);
        fout.close();

        printf("packet %3lld (size=%5d)  flags:%d\n", pkt->pts, pkt->size, pkt->flags);
        av_packet_unref(pkt);
    }
}


int encode_main(void) {
    const AVCodec* codec = nullptr;
    AVCodecContext* enc_context = nullptr;
    int ret;

    // truncate output file
    ofstream fout{ fname };
    if (!fout.is_open()) {
        cerr << "truncate output file error" << endl;
        exit(1);
    }
    fout.close();

    /* find the encoder */
    codec = avcodec_find_encoder_by_name("libx264");
    if (!codec) {
        cerr << "codec not found" << endl;
        exit(1);
    }

    /* create encoder context */
    enc_context = avcodec_alloc_context3(codec);
    if (enc_context == nullptr) {
        cerr << "could not allocate video codec context" << endl;
        exit(1);
    }

    /* set parameters */
    //c->bit_rate = 400000;
    /* resolution must be a multiple of two */
    enc_context->width = 640;
    enc_context->height = 480;
    /* frames per second */
    enc_context->time_base = { 1, 30 };
    enc_context->framerate = { 30, 1 };

    /* emit one intra frame every ten frames
     * check frame pict_type before passing frame
     * to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
     * then gop_size is ignored and the output of encoder
     * will always be I frame irrespective to gop_size
     */
    enc_context->gop_size = 30;
    enc_context->pix_fmt = AV_PIX_FMT_YUV420P;
    // when flags = 0, SPS & PPS will be generated for each IDR
    enc_context->flags = 0;
    //enc_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    //enc_context->flags2 |= AV_CODEC_FLAG2_LOCAL_HEADER;
    if (codec->id == AV_CODEC_ID_H264) {
        //enc_context->max_b_frames = 0;
        av_opt_set(enc_context->priv_data, "preset", "ultrafast", 0);
    }

    /* open it */
    if ((ret=avcodec_open2(enc_context,codec,nullptr)) < 0) {
        cerr << "could not open codec: " << endl;
        //fprintf(stderr, "Could not open codec: %s\n", av_err2str(ret));
        exit(1);
    }

    cv::Mat mat;
    cv::VideoCapture cap;
    //cap.open(0);
    int apiID = cv::CAP_ANY;      // 0 = autodetect default API
    cap.open(0, apiID);
    if (!cap.isOpened()) {
        cerr << "ERROR! Unable to open camera\n";
        exit(1);
    }
    cout << "## width: " << cap.get(cv::CAP_PROP_FRAME_WIDTH) << endl; // 640
    cout << "## height: " << cap.get(cv::CAP_PROP_FRAME_HEIGHT) << endl;  // 480
    cout << "## fps: " << cap.get(cv::CAP_PROP_FPS) << endl;  // 0

    AVFrame* frame = nullptr;
    frame = av_frame_alloc();
    if (frame == nullptr) {
        cerr << "could not allocate video frame" << endl;
        exit(1);
    }
    frame->format = enc_context->pix_fmt;
    frame->width = enc_context->width;
    frame->height = enc_context->height;
    if (av_frame_get_buffer(frame, 0) < 0) {
        cerr << "could not allocate video frame data" << endl;
        exit(1);
    }
    if (av_frame_make_writable(frame) < 0) {
        cerr << "frame is not writable" << endl;
        exit(1);
    }
    AVPacket* pkt = nullptr;
    pkt = av_packet_alloc();
    if (pkt == nullptr)
        exit(1);

    int frame_size = 640 * 480;
    for (int i = 0; i < 100; ++i) {
        fflush(stdout);

        cap.read(mat);
        if (mat.empty()) {
            cerr << "ERROR! blank frame grabbed\n";
            break;
        }
        //cv::imshow("Live", mat);
        //if (cv::waitKey(5) >= 0)
        //    break;
        // mat --> frame
        cv::cvtColor(mat, mat, cv::COLOR_BGR2YUV_I420);  // I420 is YUV420P
        memcpy(frame->data[0], mat.data, frame_size);
        memcpy(frame->data[1], mat.data + frame_size, frame_size / 4);
        memcpy(frame->data[2], mat.data + frame_size * 5 / 4, frame_size / 4);
        frame->pts = i; // presentation time stamp
        encode(enc_context, frame, pkt);
    }

    /* flush the encoder */
    cout << "start flush encoder..." << endl;
    encode(enc_context, NULL, pkt);

    avcodec_free_context(&enc_context);
    av_frame_free(&frame);
    av_packet_free(&pkt);
    return 0;
}


static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt) {
    int ret;

    // send packet to decoder
    ret = avcodec_send_packet(dec_ctx, pkt);
    if (ret < 0) {
        fprintf(stderr, "Error sending a packet for decoding %d\n", ret);
        exit(1);
    }

    while (ret >= 0) {
        // receive frame
        ret = avcodec_receive_frame(dec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return;
        }
        else if (ret < 0) {
            fprintf(stderr, "Error during decoding\n");
            exit(1);
        }

        printf("dec_ctx->frame %3d, key %d, type %d\n", 
                        dec_ctx->frame_number, 
                        frame->key_frame,
                        frame->pict_type);
        fflush(stdout);

        // frame --> mat
        AVFrame* frame_bgr24 = av_frame_alloc();
        if (frame == nullptr) {
            cerr << "could not allocate frame_bgr24" << endl;
            exit(1);
        }
        frame_bgr24->format = AV_PIX_FMT_BGR24;
        frame_bgr24->width = frame->width;
        frame_bgr24->height = frame->height;
        if (av_frame_get_buffer(frame_bgr24, 0) < 0) {
            cerr << "could not allocate video frame_bgr24 data" << endl;
            exit(1);
        }
        struct SwsContext* frame_convert_ctx;
        frame_convert_ctx = sws_getContext(
            frame->width,
            frame->height,
            AV_PIX_FMT_YUV420P,
            frame_bgr24->width,
            frame_bgr24->height,
            AV_PIX_FMT_BGR24,
            SWS_BICUBIC,
            NULL, NULL, NULL);
        sws_scale(
            frame_convert_ctx,
            frame->data,
            frame->linesize,
            0,
            frame_bgr24->height,
            frame_bgr24->data,
            frame_bgr24->linesize);

        cv::Mat mat(cv::Size(frame_bgr24->width, frame_bgr24->height), CV_8UC3);
        mat.data = frame_bgr24->data[0];
        cv::imshow("show", mat);
        if (cv::waitKey(50) >= 0) {
            av_free(frame_bgr24);
            sws_freeContext(frame_convert_ctx);
            break;
        }
        av_free(frame_bgr24);
        sws_freeContext(frame_convert_ctx);
    }
}


int decode_main()
{
    const AVCodec* codec = nullptr;
    AVCodecParserContext* parser = nullptr;
    AVCodecContext* dec_context = nullptr;
    AVFrame* frame = nullptr;
    char inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE]{};
    char* data = nullptr;
    size_t   data_size;
    int ret;
    int eof;
    AVPacket* pkt = nullptr;

    pkt = av_packet_alloc();
    if (pkt == nullptr)
        exit(1);

    /* find the video decoder */
    codec = avcodec_find_decoder(AV_CODEC_ID_H264);
    if (!codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }

    parser = av_parser_init(codec->id);
    if (!parser) {
        fprintf(stderr, "parser not found\n");
        exit(1);
    }

    dec_context = avcodec_alloc_context3(codec);
    if (!dec_context) {
        fprintf(stderr, "Could not allocate video codec context\n");
        exit(1);
    }

    /* open it */
    if (avcodec_open2(dec_context, codec, nullptr) < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }

    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }

    cv::namedWindow("show");

    ifstream fin{ fname, ios::binary };
    if (!fin.is_open()) {
        cerr << "open output file error" << endl;
    }

    while (1) {
        fin.read(inbuf, INBUF_SIZE);
        if (fin.bad()) {
            cout << "read file error" << endl;
            break;
        }
        data_size = fin.gcount();
        eof = !data_size;
        data = inbuf;
        while (data_size > 0 || eof) {
            ret = av_parser_parse2(parser, dec_context, &pkt->data, &pkt->size,
                      (const uint8_t*)data, (int)data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
            if (ret < 0) {
                fprintf(stderr, "Error while parsing\n");
                exit(1);
            }
            data += ret;
            data_size -= ret;

            if (pkt->size)
                decode(dec_context, frame, pkt);

            if (eof)
                break;
        }
        if (eof)
            break;
    }

    /* flush the decoder */
    cout << "flush decoder..." << endl;
    decode(dec_context, frame, nullptr);
    fin.close();

    cv::destroyWindow("show");

    av_parser_close(parser);
    avcodec_free_context(&dec_context);
    av_frame_free(&frame);
    av_packet_free(&pkt);
    return 0;
}


int main(void) {
    encode_main();
    decode_main();
    return 0;
}

opencv读取出来的mat对象,默认是BGR24格式,转frame时,先转换color space到YUV,然后memcpy。decode出来的frame是YUV格式,通过sws_scale的方式转成BGR24,然后做了一次指针赋值。

其实,同学们,Mat对象中的数据,和frame中的数据,本质上是一样的。

测试发现,cv::cvtColor(mat, mat, cv::COLOR_BGR2YUV_I420)执行后,如果去imshow这个mat对象,画面是乱的。

本文链接:https://cs.pynote.net/ag/image/202209275/

-- EOF --

-- MORE --