http://blog.chinaunix.net/uid-22763991-id-1769544.html
//帧内预测模式选择解析 /*帧内宏块预测编码模式:分别计算16X16和16个4X4块的代价,取两者中最小代价为该宏块的编码模式。 1、进行16X16模式的预测 (1)根据周围宏块的情况判断其可能的预测模式。(主要是上块TOP和左块LEFT) (2)计算各种可能模式的编码代价 (3)取最小代价 2、进行4X4块模式的预测 (1)根据周围宏块情况判断其可能的预测模式。(可以参考其他相邻宏块) (2)计算每个4X4块的每种预测模式的编码代价,并取代价最小 (3)将16个4X4块的最小代价相加,得到总代价和。 3、将16X16模式的代价与4X4模式的代价和进行比较,取两者最小为最后的宏块预测编码模式。*/ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) { const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter; uint8_t *p_src = h->mb.pic.p_fenc[0]; uint8_t *p_dst = h->mb.pic.p_fdec[0]; int i, idx; int i_max; int predict_mode[9]; int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16 && !h->mb.b_lossless;//混运算汇编 /*---------------- Try all mode and calculate their score ---------------*/ /* 16x16 prediction selection */ predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );//判断16x16四种预测模式是否可用性 if( b_merged_satd && i_max == 4 ) // V,H,DC,Plan四种模式全可用 且用汇编计算 { h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );//V,H,DC汇编实现 h->predict_16x16[I_PRED_16x16_P]( p_dst );//Plan a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ); for( i=0; i<4; i++ ) { int cost = a->i_satd_i16x16_dir[i] += a->i_lambda * bs_size_ue(i);//计算cost COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );//记录拥有最小cost的值的预测模式 //如果i_satd比a->i_satd_i16x16小,则把i_satd值赋给a->i_satd_i16x16,作为当前最优代价, //同时把该次预测模式作为当前最优模式;如果a->i_satd_i16x16比i_satd小,则不赋值,只作比较. } } else //同上 只有部分预测模式可用 { for( i = 0; i < i_max; i++ ) { int i_satd; int i_mode = predict_mode[i]; if( h->mb.b_lossless ) x264_predict_lossless_16x16( h, i_mode );//无损的预测 else h->predict_16x16[i_mode]( p_dst ); i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) + a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] ); COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode ); a->i_satd_i16x16_dir[i_mode] = i_satd; } } if( h->sh.i_type == SLICE_TYPE_B ) /* cavlc mb type prefix */ a->i_satd_i16x16 += a->i_lambda * i_mb_b_cost_table[I_16x16]; if( a->b_fast_intra && a->i_satd_i16x16 > 2*i_satd_inter ) return; /* 8x8 prediction selection */ if( flags & X264_ANALYSE_I8x8 )//一般是关闭的大家都不用 { DECLARE_ALIGNED_16( uint8_t edge[33] ); x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8]; int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 ); int i_cost = 0; h->mb.i_cbp_luma = 0; b_merged_satd = h->pixf.intra_sa8d_x3_8x8 && h->pixf.mbcmp[0] == h->pixf.satd[0]; // FIXME some bias like in i4x4? if( h->sh.i_type == SLICE_TYPE_B ) i_cost += a->i_lambda * i_mb_b_cost_table[I_8x8]; for( idx = 0;; idx++ ) { int x = idx&1; int y = idx>>1; uint8_t *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE; uint8_t *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE; int i_best = COST_MAX; int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx ); predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max ); h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS ); if( b_merged_satd && i_max == 9 ) { int satd[9]; h->pixf.intra_sa8d_x3_8x8( p_src_by, edge, satd ); satd[i_pred_mode] -= 3 * a->i_lambda; for( i=2; i>=0; i-- ) { int cost = a->i_satd_i8x8_dir[i][idx] = satd[i] + 4 * a->i_lambda; COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i ); } i = 3; } else i = 0; for( ; i<i_max; i++ ) { int i_satd; int i_mode = predict_mode[i]; if( h->mb.b_lossless ) x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge ); else h->predict_8x8[i_mode]( p_dst_by, edge ); i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE ) + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4); COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode ); a->i_satd_i8x8_dir[i_mode][idx] = i_satd; } i_cost += i_best; if( idx == 3 || i_cost > i_satd_thresh ) break; /* we need to encode this block now (for next ones) */ h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by, edge ); x264_mb_encode_i8x8( h, idx, a->i_qp ); x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] ); } if( idx == 3 ) { a->i_satd_i8x8 = i_cost; if( h->mb.i_skip_intra ) { h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 ); h->mb.pic.i8x8_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]]; h->mb.pic.i8x8_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]]; h->mb.pic.i8x8_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]]; h->mb.pic.i8x8_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]]; h->mb.pic.i8x8_cbp = h->mb.i_cbp_luma; if( h->mb.i_skip_intra == 2 ) h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) ); } } else { static const uint16_t cost_div_fix8[3] = {1024,512,341}; a->i_satd_i8x8 = COST_MAX; i_cost = (i_cost * cost_div_fix8[idx]) >> 8; } if( X264_MIN(i_cost, a->i_satd_i16x16) > i_satd_inter*(5+!!a->i_mbrd)/4 ) return; } /* 4x4 prediction selection */ if( flags & X264_ANALYSE_I4x4 )//分析4x4宏块的预测模式 { int i_cost; int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );//设定一个阀值(若4x4的cost>16x16的cost则终止); h->mb.i_cbp_luma = 0; b_merged_satd = h->pixf.intra_satd_x3_4x4 && h->pixf.mbcmp[0] == h->pixf.satd[0];//赋值对应模式的函数指针 if( a->i_mbrd ) i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8; i_cost = a->i_lambda * 24; /* from JVT (SATD0) */ //非RDO率失真优化模式下,宏块总代价cost_intra4*4 = 16个 4*4 小块的最佳 cost 求和 + 4 * 6 * lambda_mode. //此处由于还未进行4X4代价计算,只是预先增加4 * 6 * lambda_mode. //当采用4X4分块时,由于每个4X4块的最优预测编码模式都需要进行编码传输, //这样,相比较于16X16模式就多了传输比特数, //为了合理公平比较,规定每个8*8块加一个6*lambda_mode, //因此就等于是加了一个 4 * 6 * lambda_mode if( h->sh.i_type == SLICE_TYPE_B )// i_cost += a->i_lambda * i_mb_b_cost_table[I_4x4]; for( idx = 0;; idx++ ) { uint8_t *p_src_by = p_src + block_idx_xy_fenc[idx]; uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx]; int i_best = COST_MAX; int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx ); predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max ); if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )// topright 不存在时模拟一个 /* emulate missing topright samples */ *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U; if( b_merged_satd && i_max >= 6 ) { int satd[9]; h->pixf.intra_satd_x3_4x4( p_src_by, p_dst_by, satd ); satd[i_pred_mode] -= 3 * a->i_lambda; for( i=2; i>=0; i-- ) COPY2_IF_LT( i_best, satd[i] + 4 * a->i_lambda, a->i_predict4x4[idx], i ); i = 3; } else i = 0; for( ; i<i_max; i++ ) { int i_satd; int i_mode = predict_mode[i]; if( h->mb.b_lossless ) x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode ); else h->predict_4x4[i_mode]( p_dst_by ); i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE ) + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4); //SAD(绝对差值和)计算的差值是预测值与图像像素值的差值, //为了更准确的比较每种模式的Cost值,H.264还对这些差值进行Hadamard变换, //将差值(这些值最后要变换到频域进行编码)变换到频域求绝对差值和, //这样计算得到的值叫作绝对变换差和(SATD). COPY2_IF_LT( i_best, i_satd, a->i_predict4x4[idx], i_mode ); } i_cost += i_best; if( i_cost > i_satd_thresh || idx == 15 )//超过预定阀值,或者编码完16个4X4宏块则跳出 break; /* we need to encode this block now (for next ones) */ h->predict_4x4[a->i_predict4x4[idx]]( p_dst_by ); x264_mb_encode_i4x4( h, idx, a->i_qp );//编码这个块(量化)为下一个4x4的块做准备 h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx]; //备份一下数据 } if( idx == 15 ) { a->i_satd_i4x4 = i_cost;//最优的那个4x4预测模式 if( h->mb.i_skip_intra ) { h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 ); h->mb.pic.i4x4_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]]; h->mb.pic.i4x4_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]]; h->mb.pic.i4x4_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]]; h->mb.pic.i4x4_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]]; h->mb.pic.i4x4_cbp = h->mb.i_cbp_luma; if( h->mb.i_skip_intra == 2 ) h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) ); } } else a->i_satd_i4x4 = COST_MAX; } }