现在的位置: 首页 > 综合 > 正文

帧内预测代码分析

2014年02月18日 ⁄ 综合 ⁄ 共 7362字 ⁄ 字号 评论关闭

// 

http://blog.chinaunix.net/uid-22763991-id-1769544.html

 

//帧内预测模式选择解析


/*帧内宏块预测编码模式:分别计算16X16和16个4X4块的代价,取两者中最小代价为该宏块的编码模式。
1、进行16X16模式的预测
(1)根据周围宏块的情况判断其可能的预测模式。(主要是上块TOP和左块LEFT)
(2)计算各种可能模式的编码代价
(3)取最小代价
2、进行4X4块模式的预测
(1)根据周围宏块情况判断其可能的预测模式。(可以参考其他相邻宏块)
(2)计算每个4X4块的每种预测模式的编码代价,并取代价最小
(3)将16个4X4块的最小代价相加,得到总代价和。
3、将16X16模式的代价与4X4模式的代价和进行比较,取两者最小为最后的宏块预测编码模式。*/

static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
    uint8_t *p_src = h->mb.pic.p_fenc[0];
    uint8_t *p_dst = h->mb.pic.p_fdec[0];

    int i, idx;
    int i_max;
    int predict_mode[9];
    int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16 && !h->mb.b_lossless;//混运算汇编


    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
    predict_16x16_mode_available( h->mb.i_neighbour, predict_mode, &i_max );//判断16x16四种预测模式是否可用性


    if( b_merged_satd && i_max == 4 ) // V,H,DC,Plan四种模式全可用 且用汇编计算

    {
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );//V,H,DC汇编实现

        h->predict_16x16[I_PRED_16x16_P]( p_dst );//Plan

        a->i_satd_i16x16_dir[I_PRED_16x16_P] =
            h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
        for( i=0; i<4; i++ )
        {
            int cost = a->i_satd_i16x16_dir[i] += a->i_lambda * bs_size_ue(i);//计算cost

            COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );//记录拥有最小cost的值的预测模式

            //如果i_satd比a->i_satd_i16x16小,则把i_satd值赋给a->i_satd_i16x16,作为当前最优代价,

            //同时把该次预测模式作为当前最优模式;如果a->i_satd_i16x16比i_satd小,则不赋值,只作比较.


        }
    }
    else //同上 只有部分预测模式可用

    {
        for( i = 0; i < i_max; i++ )
        {
            int i_satd;
            int i_mode = predict_mode[i];

            if( h->mb.b_lossless )
                x264_predict_lossless_16x16( h, i_mode );//无损的预测

            else
                h->predict_16x16[i_mode]( p_dst );

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
                    a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
    }

    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
        a->i_satd_i16x16 += a->i_lambda * i_mb_b_cost_table[I_16x16];
    if( a->b_fast_intra && a->i_satd_i16x16 > 2*i_satd_inter )
        return;

    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )//一般是关闭的大家都不用

    {
        DECLARE_ALIGNED_16( uint8_t edge[33] );
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
        int i_cost = 0;
        h->mb.i_cbp_luma = 0;
        b_merged_satd = h->pixf.intra_sa8d_x3_8x8 && h->pixf.mbcmp[0] == h->pixf.satd[0];

        // FIXME some bias like in i4x4?

        if( h->sh.i_type == SLICE_TYPE_B )
            i_cost += a->i_lambda * i_mb_b_cost_table[I_8x8];

        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
            uint8_t *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            uint8_t *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );

            predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );

            if( b_merged_satd && i_max == 9 )
            {
                int satd[9];
                h->pixf.intra_sa8d_x3_8x8( p_src_by, edge, satd );
                satd[i_pred_mode] -= 3 * a->i_lambda;
                for( i=2; i>=0; i-- )
                {
                    int cost = a->i_satd_i8x8_dir[i][idx] = satd[i] + 4 * a->i_lambda;
                    COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                }
                i = 3;
            }
            else
                i = 0;

            for( ; i<i_max; i++ )
            {
                int i_satd;
                int i_mode = predict_mode[i];

                if( h->mb.b_lossless )
                    x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
                else
                    h->predict_8x8[i_mode]( p_dst_by, edge );

                i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE )
                       + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);

                COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode );
                a->i_satd_i8x8_dir[i_mode][idx] = i_satd;
            }
            i_cost += i_best;

            if( idx == 3 || i_cost > i_satd_thresh )
                break;

            /* we need to encode this block now (for next ones) */
            h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by, edge );
            x264_mb_encode_i8x8( h, idx, a->i_qp );

            x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
        }

        if( idx == 3 )
        {
            a->i_satd_i8x8 = i_cost;
            if( h->mb.i_skip_intra )
            {
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
                h->mb.pic.i8x8_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]];
                h->mb.pic.i8x8_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]];
                h->mb.pic.i8x8_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]];
                h->mb.pic.i8x8_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]];
                h->mb.pic.i8x8_cbp = h->mb.i_cbp_luma;
                if( h->mb.i_skip_intra == 2 )
                    h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
            }
        }
        else
        {
            static const uint16_t cost_div_fix8[3] = {1024,512,341};
            a->i_satd_i8x8 = COST_MAX;
            i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
        }
        if( X264_MIN(i_cost, a->i_satd_i16x16) > i_satd_inter*(5+!!a->i_mbrd)/4 )
            return;
    }

    /* 4x4 prediction selection */
    if( flags & X264_ANALYSE_I4x4 )//分析4x4宏块的预测模式

    {
        int i_cost;
        int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );//设定一个阀值(若4x4的cost>16x16的cost则终止);

        h->mb.i_cbp_luma = 0;
        b_merged_satd = h->pixf.intra_satd_x3_4x4 && h->pixf.mbcmp[0] == h->pixf.satd[0];//赋值对应模式的函数指针

        if( a->i_mbrd )
            i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;

        i_cost = a->i_lambda * 24; /* from JVT (SATD0) */
        
          //非RDO率失真优化模式下,宏块总代价cost_intra4*4 = 16个 4*4 小块的最佳 cost 求和 + 4 * 6 * lambda_mode.

                  //此处由于还未进行4X4代价计算,只是预先增加4 * 6 * lambda_mode.

                  //当采用4X4分块时,由于每个4X4块的最优预测编码模式都需要进行编码传输,

                  //这样,相比较于16X16模式就多了传输比特数,

                  //为了合理公平比较,规定每个8*8块加一个6*lambda_mode,

                  //因此就等于是加了一个 4 * 6 * lambda_mode


        if( h->sh.i_type == SLICE_TYPE_B )//

            i_cost += a->i_lambda * i_mb_b_cost_table[I_4x4];

        for( idx = 0;; idx++ )
        {
            uint8_t *p_src_by = p_src + block_idx_xy_fenc[idx];
            uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );

            predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );

            if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )// topright 不存在时模拟一个

                /* emulate missing topright samples */
                *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;

            if( b_merged_satd && i_max >= 6 )
            {
                int satd[9];
                h->pixf.intra_satd_x3_4x4( p_src_by, p_dst_by, satd );
                satd[i_pred_mode] -= 3 * a->i_lambda;
                for( i=2; i>=0; i-- )
                    COPY2_IF_LT( i_best, satd[i] + 4 * a->i_lambda,
                                 a->i_predict4x4[idx], i );
                i = 3;
            }
            else
                i = 0;

            for( ; i<i_max; i++ )
            {
                int i_satd;
                int i_mode = predict_mode[i];
                if( h->mb.b_lossless )
                    x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
                else
                    h->predict_4x4[i_mode]( p_dst_by );

                i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE,
                                                   p_src_by, FENC_STRIDE )
                       + a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
                                //SAD(绝对差值和)计算的差值是预测值与图像像素值的差值,

                                //为了更准确的比较每种模式的Cost值,H.264还对这些差值进行Hadamard变换,

                               //将差值(这些值最后要变换到频域进行编码)变换到频域求绝对差值和,

                               //这样计算得到的值叫作绝对变换差和(SATD).


                COPY2_IF_LT( i_best, i_satd, a->i_predict4x4[idx], i_mode );
            }
            i_cost += i_best;

            if( i_cost > i_satd_thresh || idx == 15 )//超过预定阀值,或者编码完16个4X4宏块则跳出

                break;

            /* we need to encode this block now (for next ones) */
            h->predict_4x4[a->i_predict4x4[idx]]( p_dst_by );
            x264_mb_encode_i4x4( h, idx, a->i_qp );//编码这个块(量化)为下一个4x4的块做准备


            h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx]; //备份一下数据

        }
        if( idx == 15 )
        {
            a->i_satd_i4x4 = i_cost;//最优的那个4x4预测模式

            if( h->mb.i_skip_intra )
            {
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
                h->mb.pic.i4x4_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]];
                h->mb.pic.i4x4_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]];
                h->mb.pic.i4x4_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]];
                h->mb.pic.i4x4_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]];
                h->mb.pic.i4x4_cbp = h->mb.i_cbp_luma;
                if( h->mb.i_skip_intra == 2 )
                    h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) );
            }
        }
        else
            a->i_satd_i4x4 = COST_MAX;
    }
}

 

抱歉!评论已关闭.