Skip to content

Commit

Permalink
Key frame temporal filtering
Browse files Browse the repository at this point in the history
Added key frame temporal filtering. Enabled it for VOD encoding
with encoder speed < 2.
Minor improvement in prediction.
Added the restriction of using no more than "arnr_max_frames"
frames for temporal filtering.
Key frame temporal filtering is turned off by default for now. To
enable it, set "--enable-keyframe-filtering=1"

Borg result with "--enable-keyframe-filtering=1"
         avg_psnr:  ovr_psnr:   ssim:    vmaf:
hdres2:   -0.762     -0.863    -0.903   -0.680
midres2:  -0.813     -0.753    -0.757   -0.743
lowres2:  -0.492     -0.598    -0.737   -0.881
The impact on the encoder time is minimal.

Change-Id: If6abea3e21efcb96f1978cd9dfaa742c40dc2a56
  • Loading branch information
Yunqing Wang committed Aug 19, 2024
1 parent 5d20cc3 commit a5ea71f
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 42 deletions.
37 changes: 32 additions & 5 deletions vp9/encoder/vp9_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free_frame_buffer(&cpi->last_frame_uf);
vpx_free_frame_buffer(&cpi->scaled_source);
vpx_free_frame_buffer(&cpi->scaled_last_source);
vpx_free_frame_buffer(&cpi->alt_ref_buffer);
vpx_free_frame_buffer(&cpi->tf_buffer);
#ifdef ENABLE_KF_DENOISE
vpx_free_frame_buffer(&cpi->raw_unscaled_source);
vpx_free_frame_buffer(&cpi->raw_scaled_source);
Expand Down Expand Up @@ -1299,15 +1299,15 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
"Failed to allocate lag buffers");

// TODO(agrange) Check if ARF is enabled and skip allocation if not.
if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
if (vpx_realloc_frame_buffer(&cpi->tf_buffer, oxcf->width, oxcf->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
"Failed to allocate temporal filter buffer");
}

static void alloc_util_frame_buffers(VP9_COMP *cpi) {
Expand Down Expand Up @@ -6460,7 +6460,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif
// Produce the filtered ARF frame.
vp9_temporal_filter(cpi, arf_src_index);
vpx_extend_frame_borders(&cpi->alt_ref_buffer);
vpx_extend_frame_borders(&cpi->tf_buffer);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, vp9_temporal_filter_time);
#endif
Expand All @@ -6470,7 +6470,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);

force_src_buffer = &cpi->alt_ref_buffer;
force_src_buffer = &cpi->tf_buffer;
}
#endif
cm->show_frame = 0;
Expand Down Expand Up @@ -6587,6 +6587,26 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
} else if (oxcf->pass == 1) {
set_frame_size(cpi);
}

// Key frame temporal filtering
const int is_key_temporal_filter_enabled =
oxcf->enable_keyframe_filtering && cpi->oxcf.mode != REALTIME &&
(oxcf->pass != 1) && !cpi->use_svc &&
!is_lossless_requested(&cpi->oxcf) && cm->frame_type == KEY_FRAME &&
(oxcf->arnr_max_frames > 0) && (oxcf->arnr_strength > 0) &&
cpi->oxcf.speed < 2;
// Save the pointer to the original source image.
YV12_BUFFER_CONFIG *source_buffer = cpi->un_scaled_source;

if (is_key_temporal_filter_enabled && source != NULL) {
// Produce the filtered Key frame. Set distance to -1 since the key frame
// is already popped out.
vp9_temporal_filter(cpi, -1);
vpx_extend_frame_borders(&cpi->tf_buffer);
force_src_buffer = &cpi->tf_buffer;
cpi->un_scaled_source = cpi->Source =
force_src_buffer ? force_src_buffer : &source->img;
}
#endif // !CONFIG_REALTIME_ONLY

if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
Expand Down Expand Up @@ -6717,6 +6737,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (cpi->keep_level_stats && oxcf->pass != 1)
update_level_info(cpi, size, arf_src_index);

#if !CONFIG_REALTIME_ONLY
if (is_key_temporal_filter_enabled && cpi->b_calculate_psnr) {
cpi->raw_source_frame = vp9_scale_if_required(
cm, source_buffer, &cpi->scaled_source, (oxcf->pass == 0), EIGHTTAP, 0);
}
#endif // !CONFIG_REALTIME_ONLY

#if CONFIG_INTERNAL_STATS

if (oxcf->pass != 1 && !cpi->last_frame_dropped) {
Expand Down
5 changes: 4 additions & 1 deletion vp9/encoder/vp9_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ typedef struct VP9EncoderConfig {

int enable_tpl_model;

int enable_keyframe_filtering;

int max_threads;

unsigned int target_level;
Expand Down Expand Up @@ -503,6 +505,7 @@ typedef struct ARNRFilterData {
int frame_count;
int alt_ref_index;
struct scale_factors sf;
YV12_BUFFER_CONFIG *dst;
} ARNRFilterData;

typedef struct EncFrameBuf {
Expand Down Expand Up @@ -872,7 +875,7 @@ typedef struct VP9_COMP {
// Force recalculation of segment_ids for each mode info
uint8_t force_update_segmentation;

YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG tf_buffer;

// class responsible for adaptive
// quantization of altref frames
Expand Down
77 changes: 41 additions & 36 deletions vp9/encoder/vp9_temporal_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ static int64_t highbd_index_mult[14] = { 0U, 0U, 0U,
// Prediction function using 12-tap interpolation filter.
// TODO([email protected]): add SIMD optimization.
#define MAX_FILTER_TAP 12
#define TF_INTERP_EXTEND 6
typedef int16_t InterpKernel12[MAX_FILTER_TAP];
// 12-tap filter (used by the encoder only).
DECLARE_ALIGNED(256, static const InterpKernel12,
Expand Down Expand Up @@ -861,6 +862,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
DECLARE_ALIGNED(16, uint16_t, count[BLK_PELS * 3]);
MACROBLOCKD *mbd = &td->mb.e_mbd;
YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
YV12_BUFFER_CONFIG *dst = arnr_filter_data->dst;
uint8_t *dst1, *dst2;
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, predictor16[BLK_PELS * 3]);
Expand All @@ -886,18 +888,17 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,

// Source frames are extended to 16 pixels. This is different than
// L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
// A 6/8 tap filter is used for motion search. This requires 2 pixels
// before and 3 pixels after. So the largest Y mv on a border would
// then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the
// Y and therefore only extended by 8. The largest mv that a UV block
// can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv.
// (16 - VP9_INTERP_EXTEND) >> 1 which is greater than
// 8 - VP9_INTERP_EXTEND.
// To keep the mv in play for both Y and UV planes the max that it
// can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
td->mb.mv_limits.row_min = -((mb_row * BH) + (17 - 2 * VP9_INTERP_EXTEND));
// A 6/8/12 tap filter is used for motion search and prediction. So the
// largest Y mv on a border would then be 16 - TF_INTERP_EXTEND. The UV
// blocks are half the size of the Y and therefore only extended by 8.
// The largest mv that a UV block can support is 8 - TF_INTERP_EXTEND.
// A UV mv is half of a Y mv. (16 - TF_INTERP_EXTEND) >> 1 is greater than
// 8 - TF_INTERP_EXTEND. To keep the mv in play for both Y and UV planes,
// the max that it can be on a border is therefore 16 - (2 * TF_INTERP_EXTEND
// + 1).
td->mb.mv_limits.row_min = -((mb_row * BH) + (17 - 2 * TF_INTERP_EXTEND));
td->mb.mv_limits.row_max =
((mb_rows - 1 - mb_row) * BH) + (17 - 2 * VP9_INTERP_EXTEND);
((mb_rows - 1 - mb_row) * BH) + (17 - 2 * TF_INTERP_EXTEND);

for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) {
int i, j, k;
Expand All @@ -907,9 +908,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
vp9_zero_array(accumulator, BLK_PELS * 3);
vp9_zero_array(count, BLK_PELS * 3);

td->mb.mv_limits.col_min = -((mb_col * BW) + (17 - 2 * VP9_INTERP_EXTEND));
td->mb.mv_limits.col_min = -((mb_col * BW) + (17 - 2 * TF_INTERP_EXTEND));
td->mb.mv_limits.col_max =
((mb_cols - 1 - mb_col) * BW) + (17 - 2 * VP9_INTERP_EXTEND);
((mb_cols - 1 - mb_col) * BW) + (17 - 2 * TF_INTERP_EXTEND);

if (cpi->oxcf.content == VP9E_CONTENT_FILM) {
unsigned int src_variance;
Expand Down Expand Up @@ -1054,9 +1055,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
uint16_t *dst1_16;
uint16_t *dst2_16;
// Normalize filter output to produce AltRef frame
dst1 = cpi->alt_ref_buffer.y_buffer;
dst1 = dst->y_buffer;
dst1_16 = CONVERT_TO_SHORTPTR(dst1);
stride = cpi->alt_ref_buffer.y_stride;
stride = dst->y_stride;
byte = mb_y_offset;
for (i = 0, k = 0; i < BH; i++) {
for (j = 0; j < BW; j++, k++) {
Expand All @@ -1073,11 +1074,11 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
byte += stride - BW;
}

dst1 = cpi->alt_ref_buffer.u_buffer;
dst2 = cpi->alt_ref_buffer.v_buffer;
dst1 = dst->u_buffer;
dst2 = dst->v_buffer;
dst1_16 = CONVERT_TO_SHORTPTR(dst1);
dst2_16 = CONVERT_TO_SHORTPTR(dst2);
stride = cpi->alt_ref_buffer.uv_stride;
stride = dst->uv_stride;
byte = mb_uv_offset;
for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) {
for (j = 0; j < mb_uv_width; j++, k++) {
Expand All @@ -1103,8 +1104,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
}
} else {
// Normalize filter output to produce AltRef frame
dst1 = cpi->alt_ref_buffer.y_buffer;
stride = cpi->alt_ref_buffer.y_stride;
dst1 = dst->y_buffer;
stride = dst->y_stride;
byte = mb_y_offset;
for (i = 0, k = 0; i < BH; i++) {
for (j = 0; j < BW; j++, k++) {
Expand All @@ -1120,9 +1121,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
byte += stride - BW;
}

dst1 = cpi->alt_ref_buffer.u_buffer;
dst2 = cpi->alt_ref_buffer.v_buffer;
stride = cpi->alt_ref_buffer.uv_stride;
dst1 = dst->u_buffer;
dst2 = dst->v_buffer;
stride = dst->uv_stride;
byte = mb_uv_offset;
for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) {
for (j = 0; j < mb_uv_width; j++, k++) {
Expand All @@ -1148,8 +1149,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
}
#else
// Normalize filter output to produce AltRef frame
dst1 = cpi->alt_ref_buffer.y_buffer;
stride = cpi->alt_ref_buffer.y_stride;
dst1 = dst->y_buffer;
stride = dst->y_stride;
byte = mb_y_offset;
for (i = 0, k = 0; i < BH; i++) {
for (j = 0; j < BW; j++, k++) {
Expand All @@ -1165,9 +1166,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
byte += stride - BW;
}

dst1 = cpi->alt_ref_buffer.u_buffer;
dst2 = cpi->alt_ref_buffer.v_buffer;
stride = cpi->alt_ref_buffer.uv_stride;
dst1 = dst->u_buffer;
dst2 = dst->v_buffer;
stride = dst->uv_stride;
byte = mb_uv_offset;
for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) {
for (j = 0; j < mb_uv_width; j++, k++) {
Expand Down Expand Up @@ -1233,10 +1234,10 @@ static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
int *arnr_frames, int *frames_backward,
int *frames_forward, int *arnr_strength) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;

int max_fwd = vp9_lookahead_depth(cpi->lookahead) - distance - 1;
int max_bwd = distance;
int max_fwd =
VPXMAX((int)vp9_lookahead_depth(cpi->lookahead) - distance - 1, 0);
int max_bwd = VPXMAX(distance, 0);
int frames = VPXMAX(oxcf->arnr_max_frames, 1);
int q, base_strength, strength;

Expand Down Expand Up @@ -1265,16 +1266,15 @@ static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,

// Adjust number of frames in filter and strength based on gf boost level.
frames = VPXMIN(frames, group_boost / 150);
frames += !(frames & 1); // Make the number odd.

if (strength > group_boost / 300) {
strength = group_boost / 300;
}

if (VPXMIN(max_fwd, max_bwd) >= frames / 2) {
// just use half half
// Handle the even/odd case.
*frames_backward = frames / 2;
*frames_forward = frames / 2;
*frames_forward = (frames - 1) / 2;
} else {
if (max_fwd < frames / 2) {
*frames_forward = max_fwd;
Expand All @@ -1297,8 +1297,7 @@ static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
// TODO(jingning): Skip temporal filtering for intermediate frames that will
// be used as show_existing_frame. Need to further explore the possibility to
// apply certain filter.
if (gf_group->arf_src_offset[gf_group->index] <
cpi->rc.baseline_gf_interval - 1) {
if (frames <= 1) {
frames = 1;
*frames_backward = 0;
*frames_forward = 0;
Expand Down Expand Up @@ -1332,6 +1331,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
arnr_filter_data->strength = strength;
arnr_filter_data->frame_count = frames_to_blur;
arnr_filter_data->alt_ref_index = frames_to_blur_backward;
arnr_filter_data->dst = &cpi->tf_buffer;

// Setup frame pointers, NULL indicates frame not included in filter.
for (frame = 0; frame < frames_to_blur; ++frame) {
Expand All @@ -1341,6 +1341,11 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
frames[frames_to_blur - 1 - frame] = &buf->img;
}

YV12_BUFFER_CONFIG *f = frames[arnr_filter_data->alt_ref_index];
xd->cur_buf = f;
xd->plane[1].subsampling_y = f->subsampling_y;
xd->plane[1].subsampling_x = f->subsampling_x;

if (frames_to_blur > 0) {
// Setup scaling factors. Scaling on each of the arnr frames is not
// supported.
Expand Down
15 changes: 15 additions & 0 deletions vp9/vp9_cx_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ typedef struct vp9_extracfg {
unsigned int tile_columns;
unsigned int tile_rows;
unsigned int enable_tpl_model;
unsigned int enable_keyframe_filtering;
unsigned int arnr_max_frames;
unsigned int arnr_strength;
unsigned int min_gf_interval;
Expand Down Expand Up @@ -83,6 +84,7 @@ static struct vp9_extracfg default_extra_cfg = {
6, // tile_columns
0, // tile_rows
1, // enable_tpl_model
0, // enable_keyframe_filtering
7, // arnr_max_frames
5, // arnr_strength
0, // min_gf_interval; 0 -> default decision
Expand Down Expand Up @@ -614,6 +616,8 @@ static vpx_codec_err_t set_encoder_config(

oxcf->enable_tpl_model = extra_cfg->enable_tpl_model;

oxcf->enable_keyframe_filtering = extra_cfg->enable_keyframe_filtering;

// TODO(yunqing): The dependencies between row tiles cause error in multi-
// threaded encoding. For now, tile_rows is forced to be 0 in this case.
// The further fix can be done by adding synchronizations after a tile row
Expand Down Expand Up @@ -965,6 +969,14 @@ static vpx_codec_err_t ctrl_set_tpl_model(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}

static vpx_codec_err_t ctrl_set_keyframe_filtering(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.enable_keyframe_filtering =
CAST(VP9E_SET_KEY_FRAME_FILTERING, args);
return update_extra_cfg(ctx, &extra_cfg);
}

static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
Expand Down Expand Up @@ -2108,6 +2120,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns },
{ VP9E_SET_TILE_ROWS, ctrl_set_tile_rows },
{ VP9E_SET_TPL, ctrl_set_tpl_model },
{ VP9E_SET_KEY_FRAME_FILTERING, ctrl_set_keyframe_filtering },
{ VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames },
{ VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength },
{ VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type },
Expand Down Expand Up @@ -2456,6 +2469,8 @@ void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf, FILE *fp) {

DUMP_STRUCT_VALUE(fp, oxcf, enable_tpl_model);

DUMP_STRUCT_VALUE(fp, oxcf, enable_keyframe_filtering);

DUMP_STRUCT_VALUE(fp, oxcf, max_threads);

DUMP_STRUCT_VALUE(fp, oxcf, target_level);
Expand Down
10 changes: 10 additions & 0 deletions vpx/vp8cx.h
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,14 @@ enum vp8e_enc_control_id {
*/
VP9E_SET_TPL,

/*!\brief Codec control function to enable key frame temporal filtering.
*
* Vp9 allows the encoder to run key frame temporal filtering and use it to
* improve the compression performance. To enable, set this parameter to be
* 1. The default value is set to be 0.
*/
VP9E_SET_KEY_FRAME_FILTERING,

/*!\brief Codec control function to enable postencode frame drop.
*
* This will allow encoder to drop frame after it's encoded.
Expand Down Expand Up @@ -1078,6 +1086,8 @@ VPX_CTRL_USE_TYPE(VP9E_SET_SVC_SPATIAL_LAYER_SYNC,
#define VPX_CTRL_VP9E_SET_SVC_SPATIAL_LAYER_SYNC
VPX_CTRL_USE_TYPE(VP9E_SET_TPL, int)
#define VPX_CTRL_VP9E_SET_TPL
VPX_CTRL_USE_TYPE(VP9E_SET_KEY_FRAME_FILTERING, int)
#define VPX_CTRL_VP9E_SET_KEY_FRAME_FILTERING
VPX_CTRL_USE_TYPE(VP9E_SET_POSTENCODE_DROP, unsigned int)
#define VPX_CTRL_VP9E_SET_POSTENCODE_DROP
VPX_CTRL_USE_TYPE(VP9E_SET_DELTA_Q_UV, int)
Expand Down
Loading

0 comments on commit a5ea71f

Please sign in to comment.