h.264视频编码加生成QT7可播放的mp4文件

实际上只是一个头文件h264_util.h。h264_util.h封装了x264库。我做的工作就是使用模板封装了一下，保证使用任何mp4 container库都可以方便地输出mp4文件。使用模板的目的是为了避免任何回调的开销。

h264_util.h依赖于x264库。

main.cpp依赖于h264_util.h和libmp42（可选）和libmp4av（可选），后两者都是mpeg4ip的一部分。使用的mpeg4ip是为了生成Quicktime7可播放的mp4文件（相信我，这点很难的，我试了很多库了。原因不是这些库不好，而是QT7太糟糕）。

附带说明一下，所用到的库和我写的代码都是可以同时在window下和linux编译运行的（我已经都试过了）。要在windows下编译x264很简单的，参考说明文档就可以。麻烦的是mpeg4ip，其文档声称不再会对windows下的兼容性负责了。要在windows下编译并使用mpeg4ip,有两个办法，或者自己提供gettimeofday的定义，或者使用其libmissing（和libmissing60是一回事，60表示vs6下编译通过，实际上我用vc7也编译运行过了）。

如果同时使用x264和mpeg4ip，又会发现一些兼容性问题。问题的关键在于windows操作下默认没有提供int8_t之类的类型定义，于是mpeg4ip会自己提供一个mpeg4ip_version.h，补上这些定义。所有的跨平台的开源项目似乎都会自说自话地为windows平台（linux下有标准头文件）提供这些定义，如果我同时使用两个这样的开源项目，就可能会产生类型重复定义的编译错误。x264是个特例，它的libx264根本就没有提供这些定义，所以我在h264_util.h补上了这些定义。又由于这些类型定义可能和mpeg4ip_version.h中的定义冲突，所以我在h264_util.h中预先加入了判断避免冲突的宏。要这些宏起作用，mpeg4ip的头文件必须先被#include于我的h264_util.h之前。

要编译x264,必须安装nasm（一种汇编语言编译器），mpeg4ip如果是只编译上文中的两个库的话，不需要，否则也需要nasm。

下面就是源代码，包括两个源代码文件，h264_util.h和main.cpp和一个makefile（仅用于linux），GNUmakefile。

h264_util.h封装了x264，main.cpp提供了两个测试例子，一个仅使用h264_util.h演示了如何进行视频编码，另一个测试例子说明了如何生成mp4。test_h264_utilities是在内存中快速生成rgb888的raw image的工具，仅仅是为了我测试方便用，你完全可以不使用它。

使用方法很简单（仅限于于linux，windows下懒得写了），安装libx264,libmp4ip,libmp4v2(./configure;make;make install总会吧？mpeg4ip安装完整的软件包也许会有问题，但是安装库是没问题的（先在源代码包的根目录下./configure，然后还是在对应的库代码的目录里make;make install），把下述三个文件拷贝到同一个目录里，然后make clean;make;./test_main,就会生成一个叫test.mp4的文件。

//h264_util.h

#ifndef __H264_UTIL_H__

#define __H264_UTIL_H__

#include <cstdio>

#include <cassert>

#include <climits>

#include <cstdlib>

#ifdef HAVE_MALLOC_H

#include <malloc.h>

#endif

#ifdef __cplusplus

extern "C" {

#endif

#if (defined HAVE_INTTYPES_H)

#include <inttypes.h>

#elif (defined HAVE_STDINT_H)

#include <stdint.h>

#else

#ifndef __MPEG4IP_WIN32_H__

typedef signed char int8_t;

typedef signed short int16_t;

typedef signed int int32_t;

typedef unsigned char uint8_t;

typedef unsigned short uint16_t;

typedef unsigned int uint32_t;

#if (defined WIN32)||(defined _WIN32)

typedef signed __int64 int64_t;

typedef unsigned __int64 uint64_t;

#else //other OS

typedef signed long long int64_t;

typedef unsigned long long uint64_t;

#endif

#include "x264.h"

#ifdef __cplusplus

}

#endif

struct ContainerTraits{

//insert+=return of write_nalu ; x264_nal_encode( _bit_buffer+inserter ......);

static int write_nalu(unsigned char *nalu, int size){ return 0;}

//encoded bit buffer, contains raw h.264 data

static void set_eop(unsigned char* bit_buffer, int size){ }

};

template <typename CTraits=ContainerTraits>

class h264_utilities

{

public:

h264_utilities(int width, int height):_h(0),_bit_buffer(0),_bit_buffer_capacity(1024*256)

{

assert(width%16==0);

assert(height%16==0);

x264_param_default(&_param);

_param.i_width = width;

_param.i_height= height;

_h=x264_encoder_open(&_param);

assert(_h);

//http://www.via.ecp.fr/via/ml/vlc-devel/2005-02/msg00371.html or google "x264_encoder_encode"

//if we do not allocate the new buffer ,

//the code "RGB_TO_I420( rgb_to_i420, 0, 1, 2, 3 );" will raise segment fault

x264_picture_alloc(&_pic,X264_CSP_RGB,_param.i_width ,_param.i_height);

// Do not force any parameters

_pic.i_type = X264_TYPE_AUTO;

_pic.i_qpplus1 = 0;

_bit_buffer_capacity=(_bit_buffer_capacity>_param.i_width*_param.i_height*4? _bit_buffer_capacity:_param.i_width*_param.i_height*4);

if (!_bit_buffer)

_bit_buffer = (unsigned char*)av_malloc(_bit_buffer_capacity);

}

~h264_utilities()

{

//assert(0);

if(_bit_buffer)

{

av_free(_bit_buffer);

_bit_buffer=0;

}

x264_encoder_close(_h);

}

//only accept row images (rgb888)

bool compress(unsigned char* frame)

{

memcpy(reinterpret_cast<void*>(_pic.img.plane[0]), reinterpret_cast<void*>(frame),_param.i_width*_param.i_height*3);

if(x264_encoder_encode(_h,&_nals,&_nnal,&_pic,&_pic_out)<0)

{

fprintf(stderr, "x264_encoder_encode failed\n" );

assert(0);

return false;

}

assert(_nnal);

int inserter=0;

for(int b = 0; b < _nnal; b++ )

{

int size;

if( ( size = x264_nal_encode( _bit_buffer+inserter, &_bit_buffer_capacity, 1, _nals+b ) ) < 0 )

{

fprintf( stderr, "need to increase buffer size (size=%d)\n", -size );

assert(0);

return false;

}

//nal unit start from

inserter+=CTraits::write_nalu(_bit_buffer+inserter,size);

}

if(_nnal)

CTraits::set_eop(_bit_buffer,inserter);

return true;

}

public:

x264_picture_t _pic;

x264_picture_t _pic_out;

x264_param_t _param;

x264_nal_t *_nals;

int _nnal;

x264_t *_h;

unsigned char * _bit_buffer;

int _bit_buffer_capacity;

public:

static void *av_malloc(unsigned int size)

{

static void *ptr;

#ifdef MEMALIGN_HACK

int diff;

#endif

/* lets disallow possible ambiguous cases */

if(size > INT_MAX)

return 0;

#ifdef MEMALIGN_HACK

ptr = malloc(size+16+1);

diff= ((-(int)ptr - 1)&15) + 1;

ptr += diff;

((char*)ptr)[-1]= diff;

#elif defined (HAVE_MEMALIGN)

ptr = memalign(16,size);

// Why 64? Indeed, we should align it:

// on 4 for 386

// on 16 for 486

// on 32 for 586, PPro - k6-III

// on 64 for K7 (maybe for P3 too).

// Because L1 and L2 caches are aligned on those values.

// But I don't want to code such logic here!

/* Why 16?

because some cpus need alignment, for example SSE2 on P4, & most RISC cpus

it will just trigger an exception and the unaligned load will be done in the

exception handler or it will just segfault (SSE2 on P4)

Why not larger? because i didnt see a difference in benchmarks ...

/* benchmarks with p3

memalign(64)+1 3071,3051,3032

memalign(64)+2 3051,3032,3041

memalign(64)+4 2911,2896,2915

memalign(64)+8 2545,2554,2550

memalign(64)+16 2543,2572,2563

memalign(64)+32 2546,2545,2571

memalign(64)+64 2570,2533,2558

btw, malloc seems to do 8 byte alignment by default here

#else

ptr = malloc(size);

#endif

return ptr;

}

/**

* av_realloc semantics (same as glibc): if ptr is 0 and size > 0,

* identical to malloc(size). If size is zero, it is identical to

* free(ptr) and 0 is returned.

static void *av_realloc(void *ptr, unsigned int size)

{

#ifdef MEMALIGN_HACK

int diff;

#endif

/* lets disallow possible ambiguous cases */

if(size > INT_MAX)

return 0;

#ifdef MEMALIGN_HACK

//FIXME this isnt aligned correctly though it probably isnt needed

if(!ptr) return av_malloc(size);

diff= ((char*)ptr)[-1];

return realloc(ptr - diff, size + diff) + diff;

#else

return realloc(ptr, size);

#endif

}

/* NOTE: ptr = 0 is explicetly allowed */

static void av_free(void *ptr)

{

/* XXX: this test should not be needed on most libcs */

if (ptr)

#ifdef MEMALIGN_HACK

free(ptr - ((char*)ptr)[-1]);

#else

free(ptr);

#endif

}

public:

void debug_dump(void)

{

//debug parameters

fprintf(stderr,"dump parameters now:\n");

fprintf(stderr,"cpu=%d\n",_param.cpu);

fprintf(stderr,"i_threads=%d\n",_param.i_threads);

fprintf(stderr,"i_width=%d\n",_param.i_width);

fprintf(stderr,"i_height=%d\n",_param.i_height);

fprintf(stderr,"i_csp=%d\n",_param.i_csp);

fprintf(stderr,"i_level_idc=%d\n",_param.i_level_idc);

fprintf(stderr,"vui.i_sar_height=%d\n",_param.vui.i_sar_height);

fprintf(stderr,"vui.i_sar_width=%d\n",_param.vui.i_sar_width);

fprintf(stderr,"i_fps_num=%d\n",_param.i_fps_num);

fprintf(stderr,"i_fps_den=%d\n",_param.i_fps_den);

fprintf(stderr,"i_frame_reference=%d\n",_param.i_frame_reference);

fprintf(stderr,"i_keyint_max=%d\n",_param.i_keyint_max);

fprintf(stderr,"i_keyint_min=%d\n",_param.i_keyint_min);

fprintf(stderr,"i_scenecut_threshold=%d\n",_param.i_scenecut_threshold);

fprintf(stderr,"i_bframe=%d\n",_param.i_bframe);

fprintf(stderr,"psz_cqm_file=%s\n",_param.psz_cqm_file);

}

};

//prepare row images rgb888

class test_h264_utilities{

public:

test_h264_utilities(int w,int h,int f):WIDTH(w),HEIGHT(h),FRAMES(f),buf(0)

{

buf=(unsigned char*)malloc(HEIGHT*WIDTH*3*FRAMES);

produce_images();

dump_images();

}

~test_h264_utilities()

{

free(buf);

}

unsigned char* frame(int j)

{

return buf+j*HEIGHT*WIDTH*3;

}

private:

void produce_images()

{

//make a big picture

//the frames (* is red pixel, - is black pixel:

// frame1 frame2 frame3 frame4

// **-- --** ---- ----

// ---- ---- --** **--

for(int j=0;j<FRAMES;j++)

{

for(int y=0;y<HEIGHT;y++)

{

for(int x=0;x<WIDTH;x++)

{

switch(j%4)

{

case 0:

if(x<=WIDTH/2&&y<=HEIGHT/2)

{

//red pixel

frame(j)[y*WIDTH*3+x*3]=255;

}

break;

case 1:

if(x>WIDTH/2&&y<=HEIGHT/2)

{

//red pixel

frame(j)[y*WIDTH*3+x*3]=255;

}

break;

case 2:

if(x>WIDTH/2&&y>HEIGHT/2)

{

//red pixel

frame(j)[y*WIDTH*3+x*3]=255;

}

break;

case 3:

if(x<=WIDTH/2&&y>HEIGHT/2)

{

//red pixel

frame(j)[y*WIDTH*3+x*3]=255;

}

break;

default:

assert(0);

}//end of switch

}//end of for

} //end of for

}

void dump_images()

{

for(int j=0;j<FRAMES;j++)

{

//debug, before writing the real file, let's dump it on screen

fprintf(stdout,"image %d:\n",j);

for(int y=0;y<HEIGHT;y++)

{

for(int x=0;x<WIDTH;x++)

{

unsigned char* row=frame(j)+y*WIDTH*3;

if(row[x*3]==255)

{

fprintf(stdout,"%d",1);

}

else

{

fprintf(stdout,"%d",0);

}

fprintf(stdout,"\n");

}//end of for

}

public:

int WIDTH;

int HEIGHT;

int FRAMES;

unsigned char* buf;

};

#endif //__H264_UTIL_H__

//main.cpp

#include <assert.h>

#include <stdio.h>

#include <string.h>

#if 1

#include "mp4.h"

#include "mp4av_h264.h" //not included in mp4.h

//--------------missing declaration -------begin

//not in mp4av_h264.h or mp4av.h :(

typedef struct

{

struct

{

int size_min;

int next;

int cnt;

int idx[17];

int poc[17];

} dpb;

int cnt;

int cnt_max;

int *frame;

} h264_dpb_t;

//--------------missing declaration -------end

struct MP4Container

{

//interface

static int write_nalu(unsigned char* nalu, int size)

{

int32_t poc = 0;

unsigned char type=nalu[4] & 0x1f;

samplesWritten++;

switch(type)

{

//sps

case 0x7:

//ISO 14496-10 (Video), Advanced Video Coding (AVC), also known as H.264

memset(&h264_dec, 0, sizeof(h264_dec));

if (h264_read_seq_info(nalu, size, &h264_dec) == -1)

{

assert(0);

fprintf(stderr, "Could not decode Sequence header\n");

return 0;

}

trackId=MP4AddH264VideoTrack(mp4File,

Mp4TimeScale, //time scale,

mp4FrameDuration(),

h264_dec.pic_width,

h264_dec.pic_height,

nalu[5], //profile (baseline profile, main profile, etc. see

nalu[6], //compatible profile

nalu[7], //levels

3);

// doesn't get added to sample buffer

// remove header

MP4AddH264SequenceParameterSet(mp4File, trackId,

nalu + 4,

size - 4);

return 0;

break;

//pps

case 0x8:

// doesn't get added to sample buffer

MP4AddH264PictureParameterSet(mp4File, trackId,

nalu + 4,

size - 4);

return 0;

break;

//filler data

case 0xc:

// doesn't get copied

break;

//access unit

//sei

case 0x6:

case 0x9:

// note - may not want to copy this - not needed

default:

if(h264_nal_unit_type_is_slice(type))

{

//slice type

slice_is_idr = h264_dec.nal_unit_type == 0x5; //type idr slice

poc = h264_dec.pic_order_cnt;

nal_is_sync = h264_slice_is_idr(&h264_dec);

}

nalu[0] = ((size-4) >> 24) & 0xff;

nalu[1] = ((size-4) >> 16) & 0xff;

nalu[2] = ((size-4) >> 8) & 0xff;

nalu[3] = ((size-4) >> 0) & 0xff;

return size;

}//end of switch

return 0;

}

static void set_eop(unsigned char* bit_buffer, int size)

{

samplesWritten++;

double thiscalc;

thiscalc = samplesWritten;

thiscalc *= Mp4TimeScale;

thiscalc /= VideoFrameRate;

thisTime = (MP4Duration)thiscalc;

MP4Duration dur;

dur = thisTime - lastTime;

if(!MP4WriteSample(mp4File,trackId,bit_buffer,size,dur,0,nal_is_sync))

{

fprintf(stderr,"can't write video frame\n");

MP4DeleteTrack(mp4File, trackId);

return;

}

lastTime = thisTime;

DpbAdd(&h264_dpb, h264_dec.pic_order_cnt, slice_is_idr);

return;

DpbFlush(&h264_dpb);

if (h264_dpb.dpb.size_min > 0)

{

unsigned int ix;

for (ix = 0; ix < samplesWritten; ix++)

{;

const int offset = DpbFrameOffset(&h264_dpb, ix);

//fprintf( stderr, "dts=%d pts=%d offset=%d\n", ix, ix+offset, offset );

MP4SetSampleRenderingOffset(mp4File, trackId, 1 + ix, offset*mp4FrameDuration());

}

DpbClean(&h264_dpb);

}

//not interface, can be deleled safely

static MP4FileHandle mp4File;

//consts

static uint32_t Mp4TimeScale;

static double VideoFrameRate;

static uint32_t mp4FrameDuration()

{

return (u_int32_t)(((double)Mp4TimeScale) / VideoFrameRate);

}

static MP4SampleId samplesWritten;

static uint8_t *nal_buffer;

static uint32_t nal_buffer_size;

static uint32_t nal_buffer_size_max;

static MP4Timestamp lastTime;

static MP4Timestamp thisTime;

static MP4TrackId trackId;

static bool nal_is_sync;

static h264_dpb_t h264_dpb;

static h264_decode_t h264_dec;

static bool slice_is_idr;

//some tools

static void DpbInit( h264_dpb_t *p )

{

p->dpb.cnt = 0;

p->dpb.next = 0;

p->dpb.size_min = 0;

p->cnt = 0;

p->cnt_max = 0;

p->frame = NULL;

}

static void DpbClean( h264_dpb_t *p )

{

free( p->frame );

}

static void DpbUpdate( h264_dpb_t *p, int is_forced )

{

int i;

int pos;

if (!is_forced && p->dpb.cnt < 16)

return;

/* find the lowest poc */

pos = 0;

for (i = 1; i < p->dpb.cnt; i++)

{

if (p->dpb.poc[i] < p->dpb.poc[pos])

pos = i;

}

//fprintf( stderr, "lowest=%d\n", pos );

/* save the idx */

if (p->dpb.idx[pos] >= p->cnt_max)

{

int inc = 1000 + (p->dpb.idx[pos]-p->cnt_max);

p->cnt_max += inc;

p->frame = (int*)realloc( p->frame, sizeof(int)*p->cnt_max );

for (i=0;i<inc;i++)

p->frame[p->cnt_max-inc+i] = -1; /* To detect errors latter */

}

p->frame[p->dpb.idx[pos]] = p->cnt++;

/* Update the dpb minimal size */

if (pos > p->dpb.size_min)

p->dpb.size_min = pos;

/* update dpb */

for (i = pos; i < p->dpb.cnt-1; i++)

{

p->dpb.idx[i] = p->dpb.idx[i+1];

p->dpb.poc[i] = p->dpb.poc[i+1];

}

p->dpb.cnt--;

}

static void DpbFlush( h264_dpb_t *p )

{

while (p->dpb.cnt > 0)

DpbUpdate( p, true );

}

static void DpbAdd( h264_dpb_t *p, int poc, int is_idr )

{

if (is_idr)

DpbFlush( p );

p->dpb.idx[p->dpb.cnt] = p->dpb.next;

p->dpb.poc[p->dpb.cnt] = poc;

p->dpb.cnt++;

p->dpb.next++;

DpbUpdate( p, false );

}

static int DpbFrameOffset( h264_dpb_t *p, int idx )

{

if (idx >= p->cnt)

return 0;

if (p->frame[idx] < 0)

return p->dpb.size_min; /* We have an error (probably broken/truncated bitstream) */

return p->dpb.size_min + p->frame[idx] - idx;

}

};

MP4FileHandle MP4Container::mp4File=0;

uint32_t MP4Container::Mp4TimeScale=90000;

//http://www.via.ecp.fr/via/ml/x264-devel/2005-07/msg00056.html

//I think we need to get the latest x264 library to test it

double MP4Container::VideoFrameRate=25*2;

MP4SampleId MP4Container::samplesWritten=0;

uint8_t* MP4Container::nal_buffer=0;

uint32_t MP4Container::nal_buffer_size=0;

uint32_t MP4Container::nal_buffer_size_max=0;

MP4Timestamp MP4Container::lastTime;

MP4Timestamp MP4Container::thisTime;

MP4TrackId MP4Container::trackId;

bool MP4Container::nal_is_sync=false;

h264_dpb_t MP4Container::h264_dpb;

h264_decode_t MP4Container::h264_dec;

bool MP4Container::slice_is_idr;

#endif

//h264_util.h must be located after the mpeg4ip's headers, because those headers typedef its own int_8

#include "h264_util.h"

int main(void)

{

#if 1

{

//width, heigth, frames

int width=192;

int height=128;

int frames=25;

test_h264_utilities images(width,height,frames);

MP4FileHandle mp4File=MP4Create("7.mp4",1,0);

if (mp4File)

{

MP4SetTimeScale(mp4File, MP4Container::Mp4TimeScale);

}

MP4Container::mp4File=mp4File;

h264_utilities<MP4Container> h264(width,height);

for(int i=0;i<images.FRAMES;i++)

{

h264.compress(images.frame(i));

}

MP4Close(mp4File);

}

#endif //testcase1

#if 1

{

//width, height,frames

test_h264_utilities images(16,16,4);

h264_utilities<> h264(16,16);

for(int i=0;i<images.FRAMES;i++)

{

h264.compress(images.frame(i));

}

#endif //testcase2, test x264 (encapsulateed in a h264_util.h)

return 0;

}

//GNUmakefile

#to get release version , leave $(IDL_DEBUG) empty

IDL_DEBUG=hi

DEVELOP_ROOT := $(shell echo `cd ../../;pwd`)

#---------------------for libquicktime------------------begin

LIBX264=#$(DEVELOP_ROOT)/x264

INC_DIRS :=#$(LIBX264)/include

LIBS =mp4v2 mp4av x264

LIB_DIR =

STATIC_LIBS =#$(LIBX264)/lib/libx264.la

SHARED_LIBS =

SHARED_LIB_DIR =

#---------------------for libquicktime------------------end

OPT =-D_DEBUG -g -O0 -Wall

CC =gcc

CXX =g++

CPPFLAGS +=$(OPT) $(addprefix -I,$(INC_DIRS))

ifdef SHARED_LIB_DIR

LDFLAGS +=-Wl,-rpath,$(SHARED_LIB_DIR) -L$(SHARED_LIB_DIR)

endif

LDFLAGS +=$(addprefix -L,$(LIB_DIR))

LDFLAGS +=$(addprefix -l,$(SHARED_LIBS)) $(addprefix -l,$(LIBS))

ifdef IDL_DEBUG

endif

SOURCES :=$(wildcard *.c) $(wildcard *.cc) $(wildcard *.cxx) $(wildcard *.cpp)

OBJS = $(addsuffix .o, $(basename $(SOURCES)))

.PHONY : all clean mkdir targets

all : targets

# create the deployment directory if it does not yet exist

DEPLOYDIR = .

mkdir :

@mkdir -p $(DEPLOYDIR)

TARGET = $(DEPLOYDIR)/test_main

$(TARGET) : $(OBJS)

libtool $(CXX) $(LDFLAGS) $^ $(STATIC_LIBS) -o $@

targets : mkdir $(TARGET)

clean :

-$(RM) $(OBJS) $(OBJS:.o=.d) core* *.bak $(TARGET) *.mov *.mp4 *.d *.mpg *.mpeg

# common block

# add auto dependency generation to default rule

COMPILE.c += -MMD

COMPILE.cc += -MMD

DFILES := $(OBJS:.o=.d)

ifneq (${MAKECMDGOALS}, clean)

-include $(DFILES)

endif

# end of common block