-- TOC --
本文的测试代码,实现了用C++的方式,调用ffmpeg接口,完成H264的encoder和decoder。encoder会生成一个out.bin文件,然后decoder再去解这个文件。
配置好VS环境,即可直接运行。我想在此文总结一下VS环境的配置ffmpeg:
#pragma comment(lib, "avcodec.lib")
的方式指定具体的lib库;上代码:
#include <cstdio>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
extern "C" {
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
#include <libavcodec/avcodec.h>
}
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "opencv_world455d.lib")
string fname{ "out.bin" };
#define INBUF_SIZE 4096
static void encode(AVCodecContext* enc_ctx, AVFrame* frame, AVPacket* pkt) {
int ret;
/* send the frame to the encoder */
ret = avcodec_send_frame(enc_ctx, frame);
if (ret < 0) {
cerr << "error sendinig a frame to encoder" << endl;
exit(1);
}
while (ret >= 0) {
// receive packet
ret = avcodec_receive_packet(enc_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
cerr << "error during encoding" << endl;
exit(1);
}
// write packet
ofstream fout{ fname, ios::binary | ios::app};
if (!fout.is_open()) {
cerr << "open out.bin in encode() error" << endl;
exit(1);
}
fout.write((const char*)pkt->data, pkt->size);
fout.close();
printf("packet %3lld (size=%5d) flags:%d\n", pkt->pts, pkt->size, pkt->flags);
av_packet_unref(pkt);
}
}
int encode_main(void) {
const AVCodec* codec = nullptr;
AVCodecContext* enc_context = nullptr;
int ret;
// truncate output file
ofstream fout{ fname };
if (!fout.is_open()) {
cerr << "truncate output file error" << endl;
exit(1);
}
fout.close();
/* find the encoder */
codec = avcodec_find_encoder_by_name("libx264");
if (!codec) {
cerr << "codec not found" << endl;
exit(1);
}
/* create encoder context */
enc_context = avcodec_alloc_context3(codec);
if (enc_context == nullptr) {
cerr << "could not allocate video codec context" << endl;
exit(1);
}
/* set parameters */
//c->bit_rate = 400000;
/* resolution must be a multiple of two */
enc_context->width = 640;
enc_context->height = 360;
/* frames per second */
enc_context->time_base = { 1, 30 };
enc_context->framerate = { 30, 1 };
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
enc_context->gop_size = 30;
enc_context->pix_fmt = AV_PIX_FMT_YUV420P;
// when flags = 0, SPS & PPS will be generated for each IDR
enc_context->flags = 0;
//enc_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
//enc_context->flags2 |= AV_CODEC_FLAG2_LOCAL_HEADER;
if (codec->id == AV_CODEC_ID_H264) {
enc_context->max_b_frames = 0;
av_opt_set(enc_context->priv_data, "preset", "veryfast", 0);
}
/* open it */
if ((ret=avcodec_open2(enc_context,codec,nullptr)) < 0) {
cerr << "could not open codec: " << endl;
//fprintf(stderr, "Could not open codec: %s\n", av_err2str(ret));
exit(1);
}
AVFrame* frame = nullptr;
frame = av_frame_alloc();
if (frame == nullptr) {
cerr << "could not allocate video frame" << endl;
exit(1);
}
frame->format = enc_context->pix_fmt;
frame->width = enc_context->width;
frame->height = enc_context->height;
if (av_frame_get_buffer(frame,0) < 0) {
cerr << "could not allocate video frame data" << endl;
exit(1);
}
printf("frame->linesize[0] = %u\n", frame->linesize[0]);
printf("frame->linesize[1] = %u\n", frame->linesize[1]);
printf("frame->linesize[2] = %u\n", frame->linesize[2]);
AVPacket* pkt = nullptr;
pkt = av_packet_alloc();
if (pkt == nullptr)
exit(1);
/* encode 3 second of video */
int x, y;
for (int i = 0; i < 100; ++i) {
fflush(stdout);
/* Make sure the frame data is writable.
On the first round, the frame is fresh from av_frame_get_buffer()
and therefore we know it is writable.
But on the next rounds, encode() will have called
avcodec_send_frame(), and the codec may have kept a reference to
the frame in its internal structures, that makes the frame
unwritable.
av_frame_make_writable() checks that and allocates a new buffer
for the frame only if necessary.
*/
if (av_frame_make_writable(frame) < 0) {
cerr << "frame is not writable" << endl;
exit(1);
}
/* Y */
for (y = 0; y < enc_context->height; y++) {
for (x = 0; x < enc_context->width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
}
}
/* Cb and Cr */
for (y = 0; y < enc_context->height/2; y++) {
for (x = 0; x < enc_context->width/2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
}
}
frame->pts = i; // presentation time stamp
/* encode the frame */
encode(enc_context, frame, pkt);
}
/* flush the encoder */
cout << "start flush encoder..." << endl;
encode(enc_context, NULL, pkt);
avcodec_free_context(&enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt)
{
int ret;
// send packet to decoder
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding %d\n", ret);
exit(1);
}
while (ret >= 0) {
// receive frame
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
return;
}
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("dec_ctx->frame %3d, key %d, type %d\n",
dec_ctx->frame_number,
frame->key_frame,
frame->pict_type);
fflush(stdout);
}
}
int decode_main()
{
const AVCodec* codec = nullptr;
AVCodecParserContext* parser = nullptr;
AVCodecContext* dec_context = nullptr;
AVFrame* frame = nullptr;
char inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE]{};
char* data = nullptr;
size_t data_size;
int ret;
int eof;
AVPacket* pkt = nullptr;
pkt = av_packet_alloc();
if (pkt == nullptr)
exit(1);
/* find the video decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "parser not found\n");
exit(1);
}
dec_context = avcodec_alloc_context3(codec);
if (!dec_context) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* open it */
if (avcodec_open2(dec_context, codec, nullptr) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
ifstream fin{ fname, ios::binary };
if (!fin.is_open()) {
cerr << "open output file error" << endl;
}
while (1) {
fin.read(inbuf, INBUF_SIZE);
if (fin.bad()) {
cout << "read file error" << endl;
break;
}
data_size = fin.gcount();
eof = !data_size;
data = inbuf;
while (data_size > 0 || eof) {
ret = av_parser_parse2(parser, dec_context, &pkt->data, &pkt->size,
(const uint8_t*)data, (int)data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->size)
decode(dec_context, frame, pkt);
if (eof)
break;
}
if (eof)
break;
}
/* flush the decoder */
cout << "flush decoder..." << endl;
decode(dec_context, frame, nullptr);
fin.close();
av_parser_close(parser);
avcodec_free_context(&dec_context);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
int main(void) {
encode_main();
decode_main();
return 0;
}
亲测OK。
以上代码,在data_size=0的时候,av_parser_parse2还被调用了一次,是为了得到最后一帧,此时pkt->size大于0,调用decode。
将一个码流拆解开成一个个NAL Unit,其实只需要判断\x00\x00\x01
这个分割标志,这样就可以不使用av_parser_parse2接口,而是直接调用decode里面封装的avcodec_send_packet和avcodec_receive_frame,网络应用,这样设计是否会更好一些?更清晰直接,但需要有一个私有的头,来将每个NAL Unit分开。
下面是曾经用Python写的一段代码:
def get_each_nal(fn):
""" yield nal unit including header and rbsp """
ibs = b''
with open(fn, 'rb') as f:
while ob:=f.read(1):
ibs += ob
if ibs[-3:] == b'\x00\x00\x01':
cont = b''
while ob:=f.read(1):
cont += ob
if (cont[-3:]==b'\x00\x00\x00' or
cont[-3:]==b'\x00\x00\x01'):
# discard emulation_prevention_three_byte 0x03
cont = re.sub(rb'\x00\x00\x03',b'\x00\x00',cont[:-3])
# nal last byte should not be 0x00
# assert cont[-1] != 0
yield cont
# one byte back in case the next is 0x01
f.seek(f.tell()-1)
ibs = b'\x00\x00'
break
else:
#print(cont)
cont = re.sub(rb'\x00\x00\x03',b'\x00\x00',cont)
#assert cont[-1] != 0
yield cont # last nal unit, do not use return!
byte string与regular expression
估计是浪费时间了,我想尝试使用正则表达式的方式来匹配NAL UNIT的分隔符,Python实现起来很自然,C++实现起来有些别扭,没找到Python中bytes对象在C++中的对应,感觉只能是char数据。后仔细一想这个思路可能是有问题的。正则表达式里面有很多专用的符号,比如\w\d\s.*?^$
等等,使用正则,主要就是使用这些符号来清晰地精确地表达一个字符串,如果不使用这些符号,完全可以不使用正则表达式。匹配NAL UNIT的分隔符号,就是这类不需要使用正则表达式专用符号的场景。
不使用av_parser_parse2的情况
在encode的时候,把每一个packet保存在一个独立的文件中,decode的时候,也是一个个的读文件。这样decode部分的代码,还可以这样写(只摘取变化的部分):
int index = 0;
int length = 0;
char* pcont = nullptr;
ifstream fin;
string ffname{};
while (index < 50) {
ffname.clear();
ffname += fname + '.' + to_string(index);
fin.open(ffname, ios::binary);
if (!fin.is_open()) {
cerr << "open out.bin.index error" << endl;
break;
}
fin.seekg(0, ios::end);
length = fin.tellg();
fin.seekg(0, ios::beg);
pcont = new char[length] {};
fin.read(pcont, length);
if (fin.bad()) {
cerr << "read out.bin.index error" << endl;
break;
}
pkt->data = (uint8_t*)pcont;
pkt->size = length;
decode(dec_context, frame, pkt);
delete pcont;
pcont = nullptr;
fin.close();
index += 1;
}
cout << "flush decoder..." << endl;
decode(dec_context, frame, nullptr);
index是encode部分代码生成的文件数量。测试decode,打印正常。代码中没有处理new失败的情况。
坑
$ x264 --fullhelp
参考代码:
// context setting should be done before open
context->width = width;
context->height = height;
frame_size = width * height;
context->time_base = { 1, fps };
context->framerate = { fps, 1 };
context->gop_size = gop;
context->pix_fmt = pix_fmt;
context->flags = 0; // generate SPS and PPS for each IDR
// preset is only fast, tune can also be zerolatency
av_opt_set(context->priv_data, pset_str.c_str(), pset_val.c_str(), 0);
if (codec->id == AV_CODEC_ID_H264) {
av_opt_set(context->priv_data, "tune", "film,zerolatency", 0);
}
if ((avcodec_open2(context, codec, nullptr)) < 0) {
avcodec_free_context(&context);
throw string{ "open named-encoder error" };
}
本文链接:https://cs.pynote.net/ag/image/202209246/
-- EOF --
-- MORE --