实际上只是一个头文件h264_util.h。h264_util.h封装了x264库。我做的工作就是使用模板封装了一下,保证使用任何mp4 container库都可以方便地输出mp4文件。使用模板的目的是为了避免任何回调的开销。
h264_util.h依赖于x264库。
main.cpp依赖于h264_util.h和libmp42(可选)和libmp4av(可选),后两者都是mpeg4ip的一部分。使用的mpeg4ip是为了生成Quicktime7可播放的mp4文件(相信我,这点很难的,我试了很多库了。原因不是这些库不好,而是QT7太糟糕)。
附带说明一下,所用到的库和我写的代码都是可以同时在window下和linux编译运行的(我已经都试过了)。要在windows下编译x264很简单的,参考说明文档就可以。麻烦的是mpeg4ip,其文档声称不再会对windows下的兼容性负责了。要在windows下编译并使用mpeg4ip,有两个办法,或者自己提供gettimeofday的定义,或者使用其libmissing(和libmissing60是一回事,60表示vs6下编译通过,实际上我用vc7也编译运行过了)。
如果同时使用x264和mpeg4ip,又会发现一些兼容性问题。问题的关键在于windows操作下默认没有提供int8_t之类的类型定义,于是mpeg4ip会自己提供一个mpeg4ip_version.h,补上这些定义。所有的跨平台的开源项目似乎都会自说自话地为windows平台(linux下有标准头文件)提供这些定义,如果我同时使用两个这样的开源项目,就可能会产生类型重复定义的编译错误。x264是个特例,它的libx264根本就没有提供这些定义,所以我在h264_util.h补上了这些定义。又由于这些类型定义可能和mpeg4ip_version.h中的定义冲突,所以我在h264_util.h中预先加入了判断避免冲突的宏。要这些宏起作用,mpeg4ip的头文件必须先被#include于我的h264_util.h之前。
要编译x264,必须安装nasm(一种汇编语言编译器),mpeg4ip如果是只编译上文中的两个库的话,不需要,否则也需要nasm。
下面就是源代码,包括两个源代码文件,h264_util.h和main.cpp和一个makefile(仅用于linux),GNUmakefile。
h264_util.h封装了x264,main.cpp提供了两个测试例子,一个仅使用h264_util.h演示了如何进行视频编码,另一个测试例子说明了如何生成mp4。test_h264_utilities是在内存中快速生成rgb888的raw image的工具,仅仅是为了我测试方便用,你完全可以不使用它。
使用方法很简单(仅限于于linux,windows下懒得写了),安装libx264,libmp4ip,libmp4v2(./configure;make;make install总会吧?mpeg4ip安装完整的软件包也许会有问题,但是安装库是没问题的(先在源代码包的根目录下./configure,然后还是在对应的库代码的目录里make;make install),把下述三个文件拷贝到同一个目录里,然后make clean;make;./test_main,就会生成一个叫test.mp4的文件。
//h264_util.h
#ifndef __H264_UTIL_H__
#define __H264_UTIL_H__
#include <cstdio>
#include <cassert>
#include <climits>
#include <cstdlib>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if (defined HAVE_INTTYPES_H)
#include <inttypes.h>
#elif (defined HAVE_STDINT_H)
#include <stdint.h>
#else
#ifndef __MPEG4IP_WIN32_H__
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#if (defined WIN32)||(defined _WIN32)
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else //other OS
typedef signed long long int64_t;
typedef unsigned long long uint64_t;
#endif
#endif
#endif
#include "x264.h"
#ifdef __cplusplus
}
#endif
struct ContainerTraits{
//insert+=return of write_nalu ; x264_nal_encode( _bit_buffer+inserter ......);
static int write_nalu(unsigned char *nalu, int size){ return 0;}
//encoded bit buffer, contains raw h.264 data
static void set_eop(unsigned char* bit_buffer, int size){ }
};
template <typename CTraits=ContainerTraits>
class h264_utilities
{
public:
h264_utilities(int width, int height):_h(0),_bit_buffer(0),_bit_buffer_capacity(1024*256)
{
assert(width%16==0);
assert(height%16==0);
x264_param_default(&_param);
_param.i_width = width;
_param.i_height= height;
_h=x264_encoder_open(&_param);
assert(_h);
//http://www.via.ecp.fr/via/ml/vlc-devel/2005-02/msg00371.html or google "x264_encoder_encode"
//if we do not allocate the new buffer ,
//the code "RGB_TO_I420( rgb_to_i420, 0, 1, 2, 3 );" will raise segment fault
x264_picture_alloc(&_pic,X264_CSP_RGB,_param.i_width ,_param.i_height);
// Do not force any parameters
_pic.i_type = X264_TYPE_AUTO;
_pic.i_qpplus1 = 0;
_bit_buffer_capacity=(_bit_buffer_capacity>_param.i_width*_param.i_height*4? _bit_buffer_capacity:_param.i_width*_param.i_height*4);
if (!_bit_buffer)
_bit_buffer = (unsigned char*)av_malloc(_bit_buffer_capacity);
}
~h264_utilities()
{
//assert(0);
if(_bit_buffer)
{
av_free(_bit_buffer);
_bit_buffer=0;
}
x264_encoder_close(_h);
}
//only accept row images (rgb888)
bool compress(unsigned char* frame)
{
memcpy(reinterpret_cast<void*>(_pic.img.plane[0]), reinterpret_cast<void*>(frame),_param.i_width*_param.i_height*3);
if(x264_encoder_encode(_h,&_nals,&_nnal,&_pic,&_pic_out)<0)
{
fprintf(stderr, "x264_encoder_encode failed\n" );
assert(0);
return false;
}
assert(_nnal);
int inserter=0;
for(int b = 0; b < _nnal; b++ )
{
int size;
if( ( size = x264_nal_encode( _bit_buffer+inserter, &_bit_buffer_capacity, 1, _nals+b ) ) < 0 )
{
fprintf( stderr, "need to increase buffer size (size=%d)\n", -size );
assert(0);
return false;
}
//nal unit start from
inserter+=CTraits::write_nalu(_bit_buffer+inserter,size);
}
if(_nnal)
CTraits::set_eop(_bit_buffer,inserter);
return true;
}
public:
x264_picture_t _pic;
x264_picture_t _pic_out;
x264_param_t _param;
x264_nal_t *_nals;
int _nnal;
x264_t *_h;
unsigned char * _bit_buffer;
int _bit_buffer_capacity;
public:
static void *av_malloc(unsigned int size)
{
static void *ptr;
#ifdef MEMALIGN_HACK
int diff;
#endif
/* lets disallow possible ambiguous cases */
if(size > INT_MAX)
return 0;
#ifdef MEMALIGN_HACK
ptr = malloc(size+16+1);
diff= ((-(int)ptr - 1)&15) + 1;
ptr += diff;
((char*)ptr)[-1]= diff;
#elif defined (HAVE_MEMALIGN)
ptr = memalign(16,size);
// Why 64? Indeed, we should align it:
// on 4 for 386
// on 16 for 486
// on 32 for 586, PPro - k6-III
// on 64 for K7 (maybe for P3 too).
// Because L1 and L2 caches are aligned on those values.
// But I don't want to code such logic here!
/* Why 16?
because some cpus need alignment, for example SSE2 on P4, & most RISC cpus
it will just trigger an exception and the unaligned load will be done in the
exception handler or it will just segfault (SSE2 on P4)
Why not larger? because i didnt see a difference in benchmarks ...
*/
/* benchmarks with p3
memalign(64)+1 3071,3051,3032
memalign(64)+2 3051,3032,3041
memalign(64)+4 2911,2896,2915
memalign(64)+8 2545,2554,2550
memalign(64)+16 2543,2572,2563
memalign(64)+32 2546,2545,2571
memalign(64)+64 2570,2533,2558
btw, malloc seems to do 8 byte alignment by default here
*/
#else
ptr = malloc(size);
#endif
return ptr;
}
/**
* av_realloc semantics (same as glibc): if ptr is 0 and size > 0,
* identical to malloc(size). If size is zero, it is identical to
* free(ptr) and 0 is returned.
*/
static void *av_realloc(void *ptr, unsigned int size)
{
#ifdef MEMALIGN_HACK
int diff;
#endif
/* lets disallow possible ambiguous cases */
if(size > INT_MAX)
return 0;
#ifdef MEMALIGN_HACK
//FIXME this isnt aligned correctly though it probably isnt needed
if(!ptr) return av_malloc(size);
diff= ((char*)ptr)[-1];
return realloc(ptr - diff, size + diff) + diff;
#else
return realloc(ptr, size);
#endif
}
/* NOTE: ptr = 0 is explicetly allowed */
static void av_free(void *ptr)
{
/* XXX: this test should not be needed on most libcs */
if (ptr)
#ifdef MEMALIGN_HACK
free(ptr - ((char*)ptr)[-1]);
#else
free(ptr);
#endif
}
public:
void debug_dump(void)
{
//debug parameters
fprintf(stderr,"dump parameters now:\n");
fprintf(stderr,"cpu=%d\n",_param.cpu);
fprintf(stderr,"i_threads=%d\n",_param.i_threads);
fprintf(stderr,"i_width=%d\n",_param.i_width);
fprintf(stderr,"i_height=%d\n",_param.i_height);
fprintf(stderr,"i_csp=%d\n",_param.i_csp);
fprintf(stderr,"i_level_idc=%d\n",_param.i_level_idc);
fprintf(stderr,"vui.i_sar_height=%d\n",_param.vui.i_sar_height);
fprintf(stderr,"vui.i_sar_width=%d\n",_param.vui.i_sar_width);
fprintf(stderr,"i_fps_num=%d\n",_param.i_fps_num);
fprintf(stderr,"i_fps_den=%d\n",_param.i_fps_den);
fprintf(stderr,"i_frame_reference=%d\n",_param.i_frame_reference);
fprintf(stderr,"i_keyint_max=%d\n",_param.i_keyint_max);
fprintf(stderr,"i_keyint_min=%d\n",_param.i_keyint_min);
fprintf(stderr,"i_scenecut_threshold=%d\n",_param.i_scenecut_threshold);
fprintf(stderr,"i_bframe=%d\n",_param.i_bframe);
fprintf(stderr,"psz_cqm_file=%s\n",_param.psz_cqm_file);
}
};
//prepare row images rgb888
class test_h264_utilities{
public:
test_h264_utilities(int w,int h,int f):WIDTH(w),HEIGHT(h),FRAMES(f),buf(0)
{
buf=(unsigned char*)malloc(HEIGHT*WIDTH*3*FRAMES);
produce_images();
dump_images();
}
~test_h264_utilities()
{
free(buf);
}
unsigned char* frame(int j)
{
return buf+j*HEIGHT*WIDTH*3;
}
private:
void produce_images()
{
//make a big picture
//the frames (* is red pixel, - is black pixel:
// frame1 frame2 frame3 frame4
// **-- --** ---- ----
// **-- --** ---- ----
// ---- ---- --** **--
// ---- ---- --** **--
for(int j=0;j<FRAMES;j++)
{
for(int y=0;y<HEIGHT;y++)
{
for(int x=0;x<WIDTH;x++)
{
switch(j%4)
{
case 0:
if(x<=WIDTH/2&&y<=HEIGHT/2)
{
//red pixel
frame(j)[y*WIDTH*3+x*3]=255;
}
break;
case 1:
if(x>WIDTH/2&&y<=HEIGHT/2)
{
//red pixel
frame(j)[y*WIDTH*3+x*3]=255;
}
break;
case 2:
if(x>WIDTH/2&&y>HEIGHT/2)
{
//red pixel
frame(j)[y*WIDTH*3+x*3]=255;
}
break;
case 3:
if(x<=WIDTH/2&&y>HEIGHT/2)
{
//red pixel
frame(j)[y*WIDTH*3+x*3]=255;
}
break;
default:
assert(0);
}//end of switch
}//end of for
} //end of for
} //end of for
}
void dump_images()
{
for(int j=0;j<FRAMES;j++)
{
//debug, before writing the real file, let's dump it on screen
fprintf(stdout,"image %d:\n",j);
for(int y=0;y<HEIGHT;y++)
{
for(int x=0;x<WIDTH;x++)
{
unsigned char* row=frame(j)+y*WIDTH*3;
if(row[x*3]==255)
{
fprintf(stdout,"%d",1);
}
else
{
fprintf(stdout,"%d",0);
}
}
fprintf(stdout,"\n");
}//end of for
}//end of for
}
public:
int WIDTH;
int HEIGHT;
int FRAMES;
unsigned char* buf;
};
#endif //__H264_UTIL_H__
//main.cpp
#include <assert.h>
#include <stdio.h>
#include <string.h>
#if 1
#include "mp4.h"
#include "mp4av_h264.h" //not included in mp4.h
//--------------missing declaration -------begin
//not in mp4av_h264.h or mp4av.h :(
typedef struct
{
struct
{
int size_min;
int next;
int cnt;
int idx[17];
int poc[17];
} dpb;
int cnt;
int cnt_max;
int *frame;
} h264_dpb_t;
//--------------missing declaration -------end
struct MP4Container
{
//interface
static int write_nalu(unsigned char* nalu, int size)
{
int32_t poc = 0;
unsigned char type=nalu[4] & 0x1f;
samplesWritten++;
switch(type)
{
//sps
case 0x7:
//ISO 14496-10 (Video), Advanced Video Coding (AVC), also known as H.264
memset(&h264_dec, 0, sizeof(h264_dec));
if (h264_read_seq_info(nalu, size, &h264_dec) == -1)
{
assert(0);
fprintf(stderr, "Could not decode Sequence header\n");
return 0;
}
trackId=MP4AddH264VideoTrack(mp4File,
Mp4TimeScale, //time scale,
mp4FrameDuration(),
h264_dec.pic_width,
h264_dec.pic_height,
nalu[5], //profile (baseline profile, main profile, etc. see
nalu[6], //compatible profile
nalu[7], //levels
3);
// doesn't get added to sample buffer
// remove header
MP4AddH264SequenceParameterSet(mp4File, trackId,
nalu + 4,
size - 4);
return 0;
break;
//pps
case 0x8:
// doesn't get added to sample buffer
MP4AddH264PictureParameterSet(mp4File, trackId,
nalu + 4,
size - 4);
return 0;
break;
//filler data
case 0xc:
// doesn't get copied
break;
//access unit
//sei
case 0x6:
case 0x9:
// note - may not want to copy this - not needed
default:
if(h264_nal_unit_type_is_slice(type))
{
//slice type
slice_is_idr = h264_dec.nal_unit_type == 0x5; //type idr slice
poc = h264_dec.pic_order_cnt;
nal_is_sync = h264_slice_is_idr(&h264_dec);
}
nalu[0] = ((size-4) >> 24) & 0xff;
nalu[1] = ((size-4) >> 16) & 0xff;
nalu[2] = ((size-4) >> 8) & 0xff;
nalu[3] = ((size-4) >> 0) & 0xff;
return size;
}//end of switch
return 0;
}
static void set_eop(unsigned char* bit_buffer, int size)
{
samplesWritten++;
double thiscalc;
thiscalc = samplesWritten;
thiscalc *= Mp4TimeScale;
thiscalc /= VideoFrameRate;
thisTime = (MP4Duration)thiscalc;
MP4Duration dur;
dur = thisTime - lastTime;
if(!MP4WriteSample(mp4File,trackId,bit_buffer,size,dur,0,nal_is_sync))
{
fprintf(stderr,"can't write video frame\n");
MP4DeleteTrack(mp4File, trackId);
return;
}
lastTime = thisTime;
DpbAdd(&h264_dpb, h264_dec.pic_order_cnt, slice_is_idr);
return;
DpbFlush(&h264_dpb);
if (h264_dpb.dpb.size_min > 0)
{
unsigned int ix;
for (ix = 0; ix < samplesWritten; ix++)
{;
const int offset = DpbFrameOffset(&h264_dpb, ix);
//fprintf( stderr, "dts=%d pts=%d offset=%d\n", ix, ix+offset, offset );
MP4SetSampleRenderingOffset(mp4File, trackId, 1 + ix, offset*mp4FrameDuration());
}
}
DpbClean(&h264_dpb);
}
//not interface, can be deleled safely
static MP4FileHandle mp4File;
//consts
static uint32_t Mp4TimeScale;
static double VideoFrameRate;
static uint32_t mp4FrameDuration()
{
return (u_int32_t)(((double)Mp4TimeScale) / VideoFrameRate);
}
static MP4SampleId samplesWritten;
static uint8_t *nal_buffer;
static uint32_t nal_buffer_size;
static uint32_t nal_buffer_size_max;
static MP4Timestamp lastTime;
static MP4Timestamp thisTime;
static MP4TrackId trackId;
static bool nal_is_sync;
static h264_dpb_t h264_dpb;
static h264_decode_t h264_dec;
static bool slice_is_idr;
//some tools
static void DpbInit( h264_dpb_t *p )
{
p->dpb.cnt = 0;
p->dpb.next = 0;
p->dpb.size_min = 0;
p->cnt = 0;
p->cnt_max = 0;
p->frame = NULL;
}
static void DpbClean( h264_dpb_t *p )
{
free( p->frame );
}
static void DpbUpdate( h264_dpb_t *p, int is_forced )
{
int i;
int pos;
if (!is_forced && p->dpb.cnt < 16)
return;
/* find the lowest poc */
pos = 0;
for (i = 1; i < p->dpb.cnt; i++)
{
if (p->dpb.poc[i] < p->dpb.poc[pos])
pos = i;
}
//fprintf( stderr, "lowest=%d\n", pos );
/* save the idx */
if (p->dpb.idx[pos] >= p->cnt_max)
{
int inc = 1000 + (p->dpb.idx[pos]-p->cnt_max);
p->cnt_max += inc;
p->frame = (int*)realloc( p->frame, sizeof(int)*p->cnt_max );
for (i=0;i<inc;i++)
p->frame[p->cnt_max-inc+i] = -1; /* To detect errors latter */
}
p->frame[p->dpb.idx[pos]] = p->cnt++;
/* Update the dpb minimal size */
if (pos > p->dpb.size_min)
p->dpb.size_min = pos;
/* update dpb */
for (i = pos; i < p->dpb.cnt-1; i++)
{
p->dpb.idx[i] = p->dpb.idx[i+1];
p->dpb.poc[i] = p->dpb.poc[i+1];
}
p->dpb.cnt--;
}
static void DpbFlush( h264_dpb_t *p )
{
while (p->dpb.cnt > 0)
DpbUpdate( p, true );
}
static void DpbAdd( h264_dpb_t *p, int poc, int is_idr )
{
if (is_idr)
DpbFlush( p );
p->dpb.idx[p->dpb.cnt] = p->dpb.next;
p->dpb.poc[p->dpb.cnt] = poc;
p->dpb.cnt++;
p->dpb.next++;
DpbUpdate( p, false );
}
static int DpbFrameOffset( h264_dpb_t *p, int idx )
{
if (idx >= p->cnt)
return 0;
if (p->frame[idx] < 0)
return p->dpb.size_min; /* We have an error (probably broken/truncated bitstream) */
return p->dpb.size_min + p->frame[idx] - idx;
}
};
MP4FileHandle MP4Container::mp4File=0;
uint32_t MP4Container::Mp4TimeScale=90000;
//http://www.via.ecp.fr/via/ml/x264-devel/2005-07/msg00056.html
//I think we need to get the latest x264 library to test it
double MP4Container::VideoFrameRate=25*2;
MP4SampleId MP4Container::samplesWritten=0;
uint8_t* MP4Container::nal_buffer=0;
uint32_t MP4Container::nal_buffer_size=0;
uint32_t MP4Container::nal_buffer_size_max=0;
MP4Timestamp MP4Container::lastTime;
MP4Timestamp MP4Container::thisTime;
MP4TrackId MP4Container::trackId;
bool MP4Container::nal_is_sync=false;
h264_dpb_t MP4Container::h264_dpb;
h264_decode_t MP4Container::h264_dec;
bool MP4Container::slice_is_idr;
#endif
//h264_util.h must be located after the mpeg4ip's headers, because those headers typedef its own int_8
#include "h264_util.h"
int main(void)
{
#if 1
{
//width, heigth, frames
int width=192;
int height=128;
int frames=25;
test_h264_utilities images(width,height,frames);
MP4FileHandle mp4File=MP4Create("7.mp4",1,0);
if (mp4File)
{
MP4SetTimeScale(mp4File, MP4Container::Mp4TimeScale);
}
MP4Container::mp4File=mp4File;
h264_utilities<MP4Container> h264(width,height);
for(int i=0;i<images.FRAMES;i++)
{
h264.compress(images.frame(i));
}
MP4Close(mp4File);
}
#endif //testcase1
#if 1
{
//width, height,frames
test_h264_utilities images(16,16,4);
h264_utilities<> h264(16,16);
for(int i=0;i<images.FRAMES;i++)
{
h264.compress(images.frame(i));
}
}
#endif //testcase2, test x264 (encapsulateed in a h264_util.h)
return 0;
}
//GNUmakefile
#to get release version , leave $(IDL_DEBUG) empty
IDL_DEBUG=hi
DEVELOP_ROOT := $(shell echo `cd ../../;pwd`)
#---------------------for libquicktime------------------begin
LIBX264=#$(DEVELOP_ROOT)/x264
INC_DIRS :=#$(LIBX264)/include
LIBS =mp4v2 mp4av x264
LIB_DIR =
STATIC_LIBS =#$(LIBX264)/lib/libx264.la
SHARED_LIBS =
SHARED_LIB_DIR =
#---------------------for libquicktime------------------end
OPT =-D_DEBUG -g -O0 -Wall
CC =gcc
CXX =g++
CPPFLAGS +=$(OPT) $(addprefix -I,$(INC_DIRS))
ifdef SHARED_LIB_DIR
LDFLAGS +=-Wl,-rpath,$(SHARED_LIB_DIR) -L$(SHARED_LIB_DIR)
endif
LDFLAGS +=$(addprefix -L,$(LIB_DIR))
LDFLAGS +=$(addprefix -l,$(SHARED_LIBS)) $(addprefix -l,$(LIBS))
ifdef IDL_DEBUG
endif
SOURCES :=$(wildcard *.c) $(wildcard *.cc) $(wildcard *.cxx) $(wildcard *.cpp)
OBJS = $(addsuffix .o, $(basename $(SOURCES)))
.PHONY : all clean mkdir targets
all : targets
# create the deployment directory if it does not yet exist
DEPLOYDIR = .
mkdir :
@mkdir -p $(DEPLOYDIR)
TARGET = $(DEPLOYDIR)/test_main
$(TARGET) : $(OBJS)
libtool $(CXX) $(LDFLAGS) $^ $(STATIC_LIBS) -o $@
targets : mkdir $(TARGET)
clean :
-$(RM) $(OBJS) $(OBJS:.o=.d) core* *.bak $(TARGET) *.mov *.mp4 *.d *.mpg *.mpeg
# common block
# add auto dependency generation to default rule
COMPILE.c += -MMD
COMPILE.cc += -MMD
DFILES := $(OBJS:.o=.d)
ifneq (${MAKECMDGOALS}, clean)
-include $(DFILES)
endif
# end of common block