AOMedia AV1 Codec
svc_encoder_rtc
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 // This is an example demonstrating how to implement a multi-layer AOM
13 // encoding scheme for RTC video applications.
14 
15 #include <assert.h>
16 #include <inttypes.h>
17 #include <limits.h>
18 #include <math.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include <memory>
24 
25 #include "config/aom_config.h"
26 
27 #if CONFIG_AV1_DECODER
28 #include "aom/aom_decoder.h"
29 #endif
30 #include "aom/aom_encoder.h"
31 #include "aom/aom_image.h"
32 #include "aom/aom_integer.h"
33 #include "aom/aomcx.h"
34 #include "aom_dsp/bitwriter_buffer.h"
35 #include "aom_ports/aom_timer.h"
36 #include "av1/ratectrl_rtc.h"
37 #include "common/args.h"
38 #include "common/tools_common.h"
39 #include "common/video_writer.h"
40 #include "examples/encoder_util.h"
41 #include "examples/multilayer_metadata.h"
42 
43 #define OPTION_BUFFER_SIZE 1024
44 #define MAX_NUM_SPATIAL_LAYERS 4
45 
46 #define GOOD_QUALITY 0
47 
48 typedef struct {
49  const char *output_filename;
50  char options[OPTION_BUFFER_SIZE];
51  struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
52  int speed;
53  int aq_mode;
54  int layering_mode;
55  int output_obu;
56  int decode;
57  int tune_content;
58  int show_psnr;
59  bool use_external_rc;
60  bool scale_factors_explicitly_set;
61  const char *multilayer_metadata_file;
62 } AppInput;
63 
64 typedef enum {
65  QUANTIZER = 0,
66  BITRATE,
67  SCALE_FACTOR,
68  AUTO_ALT_REF,
69  ALL_OPTION_TYPES
70 } LAYER_OPTION_TYPE;
71 
72 enum { kSkip = 0, kDeltaQ = 1, kDeltaLF = 2, kReference = 3 };
73 
74 static const arg_def_t outputfile =
75  ARG_DEF("o", "output", 1, "Output filename");
76 static const arg_def_t frames_arg =
77  ARG_DEF("f", "frames", 1, "Number of frames to encode");
78 static const arg_def_t threads_arg =
79  ARG_DEF("th", "threads", 1, "Number of threads to use");
80 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
81 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
82 static const arg_def_t timebase_arg =
83  ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
84 static const arg_def_t bitrate_arg = ARG_DEF(
85  "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
86 static const arg_def_t spatial_layers_arg =
87  ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
88 static const arg_def_t temporal_layers_arg =
89  ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
90 static const arg_def_t layering_mode_arg =
91  ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
92 static const arg_def_t kf_dist_arg =
93  ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
94 static const arg_def_t scale_factors_arg =
95  ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
96 static const arg_def_t min_q_arg =
97  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
98 static const arg_def_t max_q_arg =
99  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
100 static const arg_def_t speed_arg =
101  ARG_DEF("sp", "speed", 1, "Speed configuration");
102 static const arg_def_t aqmode_arg =
103  ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
104 static const arg_def_t bitrates_arg =
105  ARG_DEF("bl", "bitrates", 1,
106  "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
107 static const arg_def_t dropframe_thresh_arg =
108  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
109 static const arg_def_t error_resilient_arg =
110  ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
111 static const arg_def_t output_obu_arg =
112  ARG_DEF(NULL, "output-obu", 1,
113  "Write OBUs when set to 1. Otherwise write IVF files.");
114 static const arg_def_t test_decode_arg =
115  ARG_DEF(NULL, "test-decode", 1,
116  "Attempt to test decoding the output when set to 1. Default is 1.");
117 static const arg_def_t psnr_arg =
118  ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
119 static const arg_def_t ext_rc_arg =
120  ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
121 static const struct arg_enum_list tune_content_enum[] = {
122  { "default", AOM_CONTENT_DEFAULT },
123  { "screen", AOM_CONTENT_SCREEN },
124  { "film", AOM_CONTENT_FILM },
125  { NULL, 0 }
126 };
127 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
128  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
129 #if CONFIG_CWG_E050
130 static const arg_def_t multilayer_metadata_file_arg =
131  ARG_DEF("ml", "multilayer_metadata_file", 1,
132  "Experimental: path to multilayer metadata file");
133 #endif
134 
135 #if CONFIG_AV1_HIGHBITDEPTH
136 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
137  { "10", AOM_BITS_10 },
138  { NULL, 0 } };
139 
140 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
141  "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
142 #endif // CONFIG_AV1_HIGHBITDEPTH
143 
144 static const arg_def_t *svc_args[] = {
145  &frames_arg,
146  &outputfile,
147  &width_arg,
148  &height_arg,
149  &timebase_arg,
150  &bitrate_arg,
151  &spatial_layers_arg,
152  &kf_dist_arg,
153  &scale_factors_arg,
154  &min_q_arg,
155  &max_q_arg,
156  &temporal_layers_arg,
157  &layering_mode_arg,
158  &threads_arg,
159  &aqmode_arg,
160 #if CONFIG_AV1_HIGHBITDEPTH
161  &bitdepth_arg,
162 #endif
163  &speed_arg,
164  &bitrates_arg,
165  &dropframe_thresh_arg,
166  &error_resilient_arg,
167  &output_obu_arg,
168  &test_decode_arg,
169  &tune_content_arg,
170  &psnr_arg,
171 #if CONFIG_CWG_E050
172  &multilayer_metadata_file_arg,
173 #endif
174  NULL,
175 };
176 
177 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
178 
179 static const char *exec_name;
180 
181 void usage_exit(void) {
182  fprintf(stderr,
183  "Usage: %s <options> input_filename [input_filename ...] -o "
184  "output_filename\n",
185  exec_name);
186  fprintf(stderr, "Options:\n");
187  arg_show_usage(stderr, svc_args);
188  fprintf(
189  stderr,
190  "Input files must be y4m or yuv.\n"
191  "If multiple input files are specified, they correspond to spatial "
192  "layers, and there should be as many as there are spatial layers.\n"
193  "All input files must have the same width, height, frame rate and number "
194  "of frames.\n"
195  "If only one file is specified, it is used for all spatial layers.\n");
196  exit(EXIT_FAILURE);
197 }
198 
199 static int file_is_y4m(const char detect[4]) {
200  return memcmp(detect, "YUV4", 4) == 0;
201 }
202 
203 static int fourcc_is_ivf(const char detect[4]) {
204  if (memcmp(detect, "DKIF", 4) == 0) {
205  return 1;
206  }
207  return 0;
208 }
209 
210 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
211  1 };
212 
213 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
214 
215 static void open_input_file(struct AvxInputContext *input,
217  /* Parse certain options from the input file, if possible */
218  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
219  : set_binary_mode(stdin);
220 
221  if (!input->file) fatal("Failed to open input file");
222 
223  if (!fseeko(input->file, 0, SEEK_END)) {
224  /* Input file is seekable. Figure out how long it is, so we can get
225  * progress info.
226  */
227  input->length = ftello(input->file);
228  rewind(input->file);
229  }
230 
231  /* Default to 1:1 pixel aspect ratio. */
232  input->pixel_aspect_ratio.numerator = 1;
233  input->pixel_aspect_ratio.denominator = 1;
234 
235  /* For RAW input sources, these bytes will applied on the first frame
236  * in read_frame().
237  */
238  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
239  input->detect.position = 0;
240 
241  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
242  if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
243  input->only_i420) >= 0) {
244  input->file_type = FILE_TYPE_Y4M;
245  input->width = input->y4m.pic_w;
246  input->height = input->y4m.pic_h;
247  input->pixel_aspect_ratio.numerator = input->y4m.par_n;
248  input->pixel_aspect_ratio.denominator = input->y4m.par_d;
249  input->framerate.numerator = input->y4m.fps_n;
250  input->framerate.denominator = input->y4m.fps_d;
251  input->fmt = input->y4m.aom_fmt;
252  input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
253  } else {
254  fatal("Unsupported Y4M stream.");
255  }
256  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
257  fatal("IVF is not supported as input.");
258  } else {
259  input->file_type = FILE_TYPE_RAW;
260  }
261 }
262 
263 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
264  int *value0, int *value1) {
265  if (type == SCALE_FACTOR) {
266  *value0 = (int)strtol(input, &input, 10);
267  if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
268  *value1 = (int)strtol(input, &input, 10);
269 
270  if (*value0 < option_min_values[SCALE_FACTOR] ||
271  *value1 < option_min_values[SCALE_FACTOR] ||
272  *value0 > option_max_values[SCALE_FACTOR] ||
273  *value1 > option_max_values[SCALE_FACTOR] ||
274  *value0 > *value1) // num shouldn't be greater than den
276  } else {
277  *value0 = atoi(input);
278  if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
280  }
281  return AOM_CODEC_OK;
282 }
283 
284 static aom_codec_err_t parse_layer_options_from_string(
285  aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
286  int *option0, int *option1) {
288  char *input_string;
289  char *token;
290  const char *delim = ",";
291  int num_layers = svc_params->number_spatial_layers;
292  int i = 0;
293 
294  if (type == BITRATE)
295  num_layers =
296  svc_params->number_spatial_layers * svc_params->number_temporal_layers;
297 
298  if (input == NULL || option0 == NULL ||
299  (option1 == NULL && type == SCALE_FACTOR))
301 
302  const size_t input_length = strlen(input);
303  input_string = reinterpret_cast<char *>(malloc(input_length + 1));
304  if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
305  memcpy(input_string, input, input_length + 1);
306  token = strtok(input_string, delim); // NOLINT
307  for (i = 0; i < num_layers; ++i) {
308  if (token != NULL) {
309  res = extract_option(type, token, option0 + i, option1 + i);
310  if (res != AOM_CODEC_OK) break;
311  token = strtok(NULL, delim); // NOLINT
312  } else {
314  break;
315  }
316  }
317  free(input_string);
318  return res;
319 }
320 
321 static void parse_command_line(int argc, const char **argv_,
322  AppInput *app_input,
323  aom_svc_params_t *svc_params,
324  aom_codec_enc_cfg_t *enc_cfg) {
325  struct arg arg;
326  char **argv = NULL;
327  char **argi = NULL;
328  char **argj = NULL;
329  char string_options[1024] = { 0 };
330 
331  // Default settings
332  svc_params->number_spatial_layers = 1;
333  svc_params->number_temporal_layers = 1;
334  app_input->layering_mode = 0;
335  app_input->output_obu = 0;
336  app_input->decode = 1;
337  enc_cfg->g_threads = 1;
338  enc_cfg->rc_end_usage = AOM_CBR;
339 
340  // process command line options
341  argv = argv_dup(argc - 1, argv_ + 1);
342  if (!argv) {
343  fprintf(stderr, "Error allocating argument list\n");
344  exit(EXIT_FAILURE);
345  }
346  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
347  arg.argv_step = 1;
348 
349  if (arg_match(&arg, &outputfile, argi)) {
350  app_input->output_filename = arg.val;
351  } else if (arg_match(&arg, &width_arg, argi)) {
352  enc_cfg->g_w = arg_parse_uint(&arg);
353  } else if (arg_match(&arg, &height_arg, argi)) {
354  enc_cfg->g_h = arg_parse_uint(&arg);
355  } else if (arg_match(&arg, &timebase_arg, argi)) {
356  enc_cfg->g_timebase = arg_parse_rational(&arg);
357  } else if (arg_match(&arg, &bitrate_arg, argi)) {
358  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
359  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
360  svc_params->number_spatial_layers = arg_parse_uint(&arg);
361  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
362  svc_params->number_temporal_layers = arg_parse_uint(&arg);
363  } else if (arg_match(&arg, &speed_arg, argi)) {
364  app_input->speed = arg_parse_uint(&arg);
365  if (app_input->speed > 11) {
366  aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
367  }
368  } else if (arg_match(&arg, &aqmode_arg, argi)) {
369  app_input->aq_mode = arg_parse_uint(&arg);
370  } else if (arg_match(&arg, &threads_arg, argi)) {
371  enc_cfg->g_threads = arg_parse_uint(&arg);
372  } else if (arg_match(&arg, &layering_mode_arg, argi)) {
373  app_input->layering_mode = arg_parse_int(&arg);
374  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
375  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
376  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
377  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
378  aom_codec_err_t res = parse_layer_options_from_string(
379  svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
380  svc_params->scaling_factor_den);
381  app_input->scale_factors_explicitly_set = true;
382  if (res != AOM_CODEC_OK) {
383  die("Failed to parse scale factors: %s\n",
385  }
386  } else if (arg_match(&arg, &min_q_arg, argi)) {
387  enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
388  } else if (arg_match(&arg, &max_q_arg, argi)) {
389  enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
390 #if CONFIG_AV1_HIGHBITDEPTH
391  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
392  enc_cfg->g_bit_depth =
393  static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
394  switch (enc_cfg->g_bit_depth) {
395  case AOM_BITS_8:
396  enc_cfg->g_input_bit_depth = 8;
397  enc_cfg->g_profile = 0;
398  break;
399  case AOM_BITS_10:
400  enc_cfg->g_input_bit_depth = 10;
401  enc_cfg->g_profile = 0;
402  break;
403  default:
404  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
405  }
406 #endif // CONFIG_VP9_HIGHBITDEPTH
407  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
408  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
409  } else if (arg_match(&arg, &error_resilient_arg, argi)) {
410  enc_cfg->g_error_resilient = arg_parse_uint(&arg);
411  if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
412  die("Invalid value for error resilient (0, 1): %d.",
413  enc_cfg->g_error_resilient);
414  } else if (arg_match(&arg, &output_obu_arg, argi)) {
415  app_input->output_obu = arg_parse_uint(&arg);
416  if (app_input->output_obu != 0 && app_input->output_obu != 1)
417  die("Invalid value for obu output flag (0, 1): %d.",
418  app_input->output_obu);
419  } else if (arg_match(&arg, &test_decode_arg, argi)) {
420  app_input->decode = arg_parse_uint(&arg);
421  if (app_input->decode != 0 && app_input->decode != 1)
422  die("Invalid value for test decode flag (0, 1): %d.",
423  app_input->decode);
424  } else if (arg_match(&arg, &tune_content_arg, argi)) {
425  app_input->tune_content = arg_parse_enum_or_int(&arg);
426  printf("tune content %d\n", app_input->tune_content);
427  } else if (arg_match(&arg, &psnr_arg, argi)) {
428  app_input->show_psnr = 1;
429  } else if (arg_match(&arg, &ext_rc_arg, argi)) {
430  app_input->use_external_rc = true;
431 #if CONFIG_CWG_E050
432  } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
433  app_input->multilayer_metadata_file = arg.val;
434 #endif
435  } else {
436  ++argj;
437  }
438  }
439 
440  // Total bitrate needs to be parsed after the number of layers.
441  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
442  arg.argv_step = 1;
443  if (arg_match(&arg, &bitrates_arg, argi)) {
444  aom_codec_err_t res = parse_layer_options_from_string(
445  svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
446  if (res != AOM_CODEC_OK) {
447  die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
448  }
449  } else {
450  ++argj;
451  }
452  }
453 
454  // There will be a space in front of the string options
455  if (strlen(string_options) > 0)
456  strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
457 
458  // Check for unrecognized options
459  for (argi = argv; *argi; ++argi)
460  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
461  die("Error: Unrecognized option %s\n", *argi);
462 
463  if (argv[0] == NULL) {
464  usage_exit();
465  }
466 
467  int input_count = 0;
468  while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
469  app_input->input_ctx[input_count].filename = argv[input_count];
470  ++input_count;
471  }
472  if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
473  die("Error: Number of input files does not match number of spatial layers");
474  }
475  if (argv[input_count] != NULL) {
476  die("Error: Too many input files specified, there should be at most %d",
477  MAX_NUM_SPATIAL_LAYERS);
478  }
479 
480  free(argv);
481 
482  for (int i = 0; i < input_count; ++i) {
483  open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
484  if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
485  if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
486  // Override these settings with the info from Y4M file.
487  enc_cfg->g_w = app_input->input_ctx[i].width;
488  enc_cfg->g_h = app_input->input_ctx[i].height;
489  // g_timebase is the reciprocal of frame rate.
490  enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
491  enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
492  } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
493  enc_cfg->g_h != app_input->input_ctx[i].height ||
494  enc_cfg->g_timebase.num !=
495  app_input->input_ctx[i].framerate.denominator ||
496  enc_cfg->g_timebase.den !=
497  app_input->input_ctx[i].framerate.numerator) {
498  die("Error: Input file dimensions and/or frame rate mismatch");
499  }
500  }
501  }
502  if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
503  die("Error: Input file dimensions not set, use -w and -h");
504  }
505 
506  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
507  enc_cfg->g_h % 2)
508  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
509 
510  printf(
511  "Codec %s\n"
512  "layers: %d\n"
513  "width %u, height: %u\n"
514  "num: %d, den: %d, bitrate: %u\n"
515  "gop size: %u\n",
517  svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
518  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
519  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
520 }
521 
522 static const int mode_to_num_temporal_layers[12] = {
523  1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
524 };
525 static const int mode_to_num_spatial_layers[12] = {
526  1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
527 };
528 
529 // For rate control encoding stats.
530 struct RateControlMetrics {
531  // Number of input frames per layer.
532  int layer_input_frames[AOM_MAX_TS_LAYERS];
533  // Number of encoded non-key frames per layer.
534  int layer_enc_frames[AOM_MAX_TS_LAYERS];
535  // Framerate per layer layer (cumulative).
536  double layer_framerate[AOM_MAX_TS_LAYERS];
537  // Target average frame size per layer (per-frame-bandwidth per layer).
538  double layer_pfb[AOM_MAX_LAYERS];
539  // Actual average frame size per layer.
540  double layer_avg_frame_size[AOM_MAX_LAYERS];
541  // Average rate mismatch per layer (|target - actual| / target).
542  double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
543  // Actual encoding bitrate per layer (cumulative across temporal layers).
544  double layer_encoding_bitrate[AOM_MAX_LAYERS];
545  // Average of the short-time encoder actual bitrate.
546  // TODO(marpan): Should we add these short-time stats for each layer?
547  double avg_st_encoding_bitrate;
548  // Variance of the short-time encoder actual bitrate.
549  double variance_st_encoding_bitrate;
550  // Window (number of frames) for computing short-timee encoding bitrate.
551  int window_size;
552  // Number of window measurements.
553  int window_count;
554  int layer_target_bitrate[AOM_MAX_LAYERS];
555 };
556 
557 static const int REF_FRAMES = 8;
558 
559 static const int INTER_REFS_PER_FRAME = 7;
560 
561 // Reference frames used in this example encoder.
562 enum {
563  SVC_LAST_FRAME = 0,
564  SVC_LAST2_FRAME,
565  SVC_LAST3_FRAME,
566  SVC_GOLDEN_FRAME,
567  SVC_BWDREF_FRAME,
568  SVC_ALTREF2_FRAME,
569  SVC_ALTREF_FRAME
570 };
571 
572 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
573  FILE *f = input_ctx->file;
574  y4m_input *y4m = &input_ctx->y4m;
575  int shortread = 0;
576 
577  if (input_ctx->file_type == FILE_TYPE_Y4M) {
578  if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
579  } else {
580  shortread = read_yuv_frame(input_ctx, img);
581  }
582 
583  return !shortread;
584 }
585 
586 static void close_input_file(struct AvxInputContext *input) {
587  fclose(input->file);
588  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
589 }
590 
591 // Note: these rate control metrics assume only 1 key frame in the
592 // sequence (i.e., first frame only). So for temporal pattern# 7
593 // (which has key frame for every frame on base layer), the metrics
594 // computation will be off/wrong.
595 // TODO(marpan): Update these metrics to account for multiple key frames
596 // in the stream.
597 static void set_rate_control_metrics(struct RateControlMetrics *rc,
598  double framerate, int ss_number_layers,
599  int ts_number_layers) {
600  int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
601  ts_rate_decimator[0] = 1;
602  if (ts_number_layers == 2) {
603  ts_rate_decimator[0] = 2;
604  ts_rate_decimator[1] = 1;
605  }
606  if (ts_number_layers == 3) {
607  ts_rate_decimator[0] = 4;
608  ts_rate_decimator[1] = 2;
609  ts_rate_decimator[2] = 1;
610  }
611  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
612  // per-frame-bandwidth, for the rate control encoding stats below.
613  for (int sl = 0; sl < ss_number_layers; ++sl) {
614  int i = sl * ts_number_layers;
615  rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
616  rc->layer_pfb[i] =
617  1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
618  for (int tl = 0; tl < ts_number_layers; ++tl) {
619  i = sl * ts_number_layers + tl;
620  if (tl > 0) {
621  rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
622  rc->layer_pfb[i] =
623  1000.0 *
624  (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
625  (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
626  }
627  rc->layer_input_frames[tl] = 0;
628  rc->layer_enc_frames[tl] = 0;
629  rc->layer_encoding_bitrate[i] = 0.0;
630  rc->layer_avg_frame_size[i] = 0.0;
631  rc->layer_avg_rate_mismatch[i] = 0.0;
632  }
633  }
634  rc->window_count = 0;
635  rc->window_size = 15;
636  rc->avg_st_encoding_bitrate = 0.0;
637  rc->variance_st_encoding_bitrate = 0.0;
638 }
639 
640 static void printout_rate_control_summary(struct RateControlMetrics *rc,
641  int frame_cnt, int ss_number_layers,
642  int ts_number_layers) {
643  int tot_num_frames = 0;
644  double perc_fluctuation = 0.0;
645  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
646  printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
647  for (int sl = 0; sl < ss_number_layers; ++sl) {
648  tot_num_frames = 0;
649  for (int tl = 0; tl < ts_number_layers; ++tl) {
650  int i = sl * ts_number_layers + tl;
651  const int num_dropped =
652  tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
653  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
654  tot_num_frames += rc->layer_input_frames[tl];
655  rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
656  rc->layer_encoding_bitrate[i] /
657  tot_num_frames;
658  rc->layer_avg_frame_size[i] =
659  rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
660  rc->layer_avg_rate_mismatch[i] =
661  100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
662  printf("For layer#: %d %d \n", sl, tl);
663  printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
664  rc->layer_encoding_bitrate[i]);
665  printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
666  rc->layer_avg_frame_size[i]);
667  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
668  printf(
669  "Number of input frames, encoded (non-key) frames, "
670  "and perc dropped frames: %d %d %f\n",
671  rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
672  100.0 * num_dropped / rc->layer_input_frames[tl]);
673  printf("\n");
674  }
675  }
676  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
677  rc->variance_st_encoding_bitrate =
678  rc->variance_st_encoding_bitrate / rc->window_count -
679  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
680  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
681  rc->avg_st_encoding_bitrate;
682  printf("Short-time stats, for window of %d frames:\n", rc->window_size);
683  printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
684  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
685  perc_fluctuation);
686  if (frame_cnt - 1 != tot_num_frames)
687  die("Error: Number of input frames not equal to output!\n");
688 }
689 
690 // Layer pattern configuration.
691 static void set_layer_pattern(
692  int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
693  aom_svc_ref_frame_config_t *ref_frame_config,
694  aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
695  int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed,
696  int *reference_updated, int test_roi_map) {
697  // Setting this flag to 1 enables simplex example of
698  // RPS (Reference Picture Selection) for 1 layer.
699  int use_rps_example = 0;
700  int i;
701  int enable_longterm_temporal_ref = 1;
702  int shift = (layering_mode == 8) ? 2 : 0;
703  int simulcast_mode = (layering_mode == 11);
704  *use_svc_control = 1;
705  layer_id->spatial_layer_id = spatial_layer_id;
706  int lag_index = 0;
707  int base_count = superframe_cnt >> 2;
708  ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
709  ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
710  ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
711  // Set the reference map buffer idx for the 7 references:
712  // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
713  // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
714  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
715  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
716  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
717 
718  if (ksvc_mode) {
719  // Same pattern as case 9, but the reference strucutre will be constrained
720  // below.
721  layering_mode = 9;
722  }
723  switch (layering_mode) {
724  case 0:
725  if (use_rps_example == 0) {
726  // 1-layer: update LAST on every frame, reference LAST.
727  layer_id->temporal_layer_id = 0;
728  layer_id->spatial_layer_id = 0;
729  ref_frame_config->refresh[0] = 1;
730  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
731  // Add additional reference (GOLDEN) if test_roi_map is set,
732  // to test reference frame feature on segment.
733  if (test_roi_map) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
734  } else {
735  // Pattern of 2 references (ALTREF and GOLDEN) trailing
736  // LAST by 4 and 8 frames, with some switching logic to
737  // sometimes only predict from the longer-term reference
738  //(golden here). This is simple example to test RPS
739  // (reference picture selection).
740  int last_idx = 0;
741  int last_idx_refresh = 0;
742  int gld_idx = 0;
743  int alt_ref_idx = 0;
744  int lag_alt = 4;
745  int lag_gld = 8;
746  layer_id->temporal_layer_id = 0;
747  layer_id->spatial_layer_id = 0;
748  int sh = 8; // slots 0 - 7.
749  // Moving index slot for last: 0 - (sh - 1)
750  if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
751  // Moving index for refresh of last: one ahead for next frame.
752  last_idx_refresh = superframe_cnt % sh;
753  // Moving index for gld_ref, lag behind current by lag_gld
754  if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
755  // Moving index for alt_ref, lag behind LAST by lag_alt frames.
756  if (superframe_cnt > lag_alt)
757  alt_ref_idx = (superframe_cnt - lag_alt) % sh;
758  // Set the ref_idx.
759  // Default all references to slot for last.
760  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
761  ref_frame_config->ref_idx[i] = last_idx;
762  // Set the ref_idx for the relevant references.
763  ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
764  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
765  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
766  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
767  // Refresh this slot, which will become LAST on next frame.
768  ref_frame_config->refresh[last_idx_refresh] = 1;
769  // Reference LAST, ALTREF, and GOLDEN
770  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
771  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
772  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
773  // Switch to only GOLDEN every 300 frames.
774  if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
775  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
776  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
777  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
778  // Test if the long-term is LAST instead, this is just a renaming
779  // but its tests if encoder behaves the same, whether its
780  // LAST or GOLDEN.
781  if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
782  ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
783  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
784  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
785  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
786  }
787  }
788  }
789  break;
790  case 1:
791  // 2-temporal layer.
792  // 1 3 5
793  // 0 2 4
794  // Keep golden fixed at slot 3.
795  base_count = superframe_cnt >> 1;
796  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
797  // Cyclically refresh slots 5, 6, 7, for lag alt ref.
798  lag_index = 5;
799  if (base_count > 0) {
800  lag_index = 5 + (base_count % 3);
801  if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
802  }
803  // Set the altref slot to lag_index.
804  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
805  if (superframe_cnt % 2 == 0) {
806  layer_id->temporal_layer_id = 0;
807  // Update LAST on layer 0, reference LAST.
808  ref_frame_config->refresh[0] = 1;
809  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
810  // Refresh lag_index slot, needed for lagging golen.
811  ref_frame_config->refresh[lag_index] = 1;
812  // Refresh GOLDEN every x base layer frames.
813  if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
814  } else {
815  layer_id->temporal_layer_id = 1;
816  // No updates on layer 1, reference LAST (TL0).
817  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
818  }
819  // Always reference golden and altref on TL0.
820  if (layer_id->temporal_layer_id == 0) {
821  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
822  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
823  }
824  break;
825  case 2:
826  // 3-temporal layer:
827  // 1 3 5 7
828  // 2 6
829  // 0 4 8
830  if (superframe_cnt % 4 == 0) {
831  // Base layer.
832  layer_id->temporal_layer_id = 0;
833  // Update LAST on layer 0, reference LAST.
834  ref_frame_config->refresh[0] = 1;
835  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836  } else if ((superframe_cnt - 1) % 4 == 0) {
837  layer_id->temporal_layer_id = 2;
838  // First top layer: no updates, only reference LAST (TL0).
839  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
840  } else if ((superframe_cnt - 2) % 4 == 0) {
841  layer_id->temporal_layer_id = 1;
842  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
843  ref_frame_config->refresh[1] = 1;
844  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845  } else if ((superframe_cnt - 3) % 4 == 0) {
846  layer_id->temporal_layer_id = 2;
847  // Second top layer: no updates, only reference LAST.
848  // Set buffer idx for LAST to slot 1, since that was the slot
849  // updated in previous frame. So LAST is TL1 frame.
850  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
851  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
852  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
853  }
854  break;
855  case 3:
856  // 3 TL, same as above, except allow for predicting
857  // off 2 more references (GOLDEN and ALTREF), with
858  // GOLDEN updated periodically, and ALTREF lagging from
859  // LAST from ~4 frames. Both GOLDEN and ALTREF
860  // can only be updated on base temporal layer.
861 
862  // Keep golden fixed at slot 3.
863  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
864  // Cyclically refresh slots 5, 6, 7, for lag altref.
865  lag_index = 5;
866  if (base_count > 0) {
867  lag_index = 5 + (base_count % 3);
868  if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
869  }
870  // Set the altref slot to lag_index.
871  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
872  if (superframe_cnt % 4 == 0) {
873  // Base layer.
874  layer_id->temporal_layer_id = 0;
875  // Update LAST on layer 0, reference LAST.
876  ref_frame_config->refresh[0] = 1;
877  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
878  // Refresh GOLDEN every x ~10 base layer frames.
879  if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
880  // Refresh lag_index slot, needed for lagging altref.
881  ref_frame_config->refresh[lag_index] = 1;
882  } else if ((superframe_cnt - 1) % 4 == 0) {
883  layer_id->temporal_layer_id = 2;
884  // First top layer: no updates, only reference LAST (TL0).
885  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
886  } else if ((superframe_cnt - 2) % 4 == 0) {
887  layer_id->temporal_layer_id = 1;
888  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
889  ref_frame_config->refresh[1] = 1;
890  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
891  } else if ((superframe_cnt - 3) % 4 == 0) {
892  layer_id->temporal_layer_id = 2;
893  // Second top layer: no updates, only reference LAST.
894  // Set buffer idx for LAST to slot 1, since that was the slot
895  // updated in previous frame. So LAST is TL1 frame.
896  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
897  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
898  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
899  }
900  // Every frame can reference GOLDEN AND ALTREF.
901  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
902  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
903  // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
904  if (speed >= 7) {
905  ref_frame_comp_pred->use_comp_pred[2] = 1;
906  ref_frame_comp_pred->use_comp_pred[0] = 1;
907  }
908  break;
909  case 4:
910  // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
911  // only reference GF (not LAST). Other frames only reference LAST.
912  // 1 3 5 7
913  // 2 6
914  // 0 4 8
915  if (superframe_cnt % 4 == 0) {
916  // Base layer.
917  layer_id->temporal_layer_id = 0;
918  // Update LAST on layer 0, only reference LAST.
919  ref_frame_config->refresh[0] = 1;
920  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
921  } else if ((superframe_cnt - 1) % 4 == 0) {
922  layer_id->temporal_layer_id = 2;
923  // First top layer: no updates, only reference LAST (TL0).
924  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
925  } else if ((superframe_cnt - 2) % 4 == 0) {
926  layer_id->temporal_layer_id = 1;
927  // Middle layer (TL1): update GF, only reference LAST (TL0).
928  ref_frame_config->refresh[3] = 1;
929  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
930  } else if ((superframe_cnt - 3) % 4 == 0) {
931  layer_id->temporal_layer_id = 2;
932  // Second top layer: no updates, only reference GF.
933  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
934  }
935  break;
936 
937  case 5:
938  /*
939  // 2 spatial layers, 1 temporal, without temporal prediction on SL1.
940  layer_id->temporal_layer_id = 0;
941  if (layer_id->spatial_layer_id == 0) {
942  // Reference LAST, update LAST.
943  ref_frame_config->refresh[0] = 1;
944  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
945  } else if (layer_id->spatial_layer_id == 1) {
946  // Reference LAST, which is SL0, and no refresh.
947  ref_frame_config->refresh[0] = 0;
948  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
949  }
950  break;
951  */
952  // 2 spatial layers, 1 temporal.
953  layer_id->temporal_layer_id = 0;
954  if (layer_id->spatial_layer_id == 0) {
955  // Reference LAST, update LAST.
956  ref_frame_config->refresh[0] = 1;
957  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
958  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2;
959  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
960  } else if (layer_id->spatial_layer_id == 1) {
961  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
962  // and GOLDEN to slot 0. Update slot 1 (LAST).
963  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
965  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2;
966  ref_frame_config->refresh[1] = 1;
967  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
968  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
969  }
970  break;
971 
972  case 6:
973  // 3 spatial layers, 1 temporal.
974  // Note for this case, we set the buffer idx for all references to be
975  // either LAST or GOLDEN, which are always valid references, since decoder
976  // will check if any of the 7 references is valid scale in
977  // valid_ref_frame_size().
978  layer_id->temporal_layer_id = 0;
979  if (layer_id->spatial_layer_id == 0) {
980  // Reference LAST, update LAST. Set all buffer_idx to 0.
981  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
982  ref_frame_config->ref_idx[i] = 0;
983  ref_frame_config->refresh[0] = 1;
984  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
985  } else if (layer_id->spatial_layer_id == 1) {
986  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
987  // and GOLDEN (and all other refs) to slot 0.
988  // Update slot 1 (LAST).
989  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
990  ref_frame_config->ref_idx[i] = 0;
991  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
992  ref_frame_config->refresh[1] = 1;
993  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
994  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
995  } else if (layer_id->spatial_layer_id == 2) {
996  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
997  // and GOLDEN (and all other refs) to slot 1.
998  // Update slot 2 (LAST).
999  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1000  ref_frame_config->ref_idx[i] = 1;
1001  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1002  ref_frame_config->refresh[2] = 1;
1003  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1004  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1005  // For 3 spatial layer case: allow for top spatial layer to use
1006  // additional temporal reference. Update every 10 frames.
1007  if (enable_longterm_temporal_ref) {
1008  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1009  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1010  if (base_count % 10 == 0)
1011  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1012  }
1013  }
1014  break;
1015  case 7:
1016  // 2 spatial and 3 temporal layer.
1017  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1018  if (superframe_cnt % 4 == 0) {
1019  // Base temporal layer
1020  layer_id->temporal_layer_id = 0;
1021  if (layer_id->spatial_layer_id == 0) {
1022  // Reference LAST, update LAST
1023  // Set all buffer_idx to 0
1024  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1025  ref_frame_config->ref_idx[i] = 0;
1026  ref_frame_config->refresh[0] = 1;
1027  } else if (layer_id->spatial_layer_id == 1) {
1028  // Reference LAST and GOLDEN.
1029  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1030  ref_frame_config->ref_idx[i] = 0;
1031  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1032  ref_frame_config->refresh[1] = 1;
1033  }
1034  } else if ((superframe_cnt - 1) % 4 == 0) {
1035  // First top temporal enhancement layer.
1036  layer_id->temporal_layer_id = 2;
1037  if (layer_id->spatial_layer_id == 0) {
1038  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1039  ref_frame_config->ref_idx[i] = 0;
1040  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1041  ref_frame_config->refresh[3] = 1;
1042  } else if (layer_id->spatial_layer_id == 1) {
1043  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1044  // GOLDEN (and all other refs) to slot 3.
1045  // No update.
1046  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1047  ref_frame_config->ref_idx[i] = 3;
1048  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1049  }
1050  } else if ((superframe_cnt - 2) % 4 == 0) {
1051  // Middle temporal enhancement layer.
1052  layer_id->temporal_layer_id = 1;
1053  if (layer_id->spatial_layer_id == 0) {
1054  // Reference LAST.
1055  // Set all buffer_idx to 0.
1056  // Set GOLDEN to slot 5 and update slot 5.
1057  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1058  ref_frame_config->ref_idx[i] = 0;
1059  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1060  ref_frame_config->refresh[5 - shift] = 1;
1061  } else if (layer_id->spatial_layer_id == 1) {
1062  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1063  // GOLDEN (and all other refs) to slot 5.
1064  // Set LAST3 to slot 6 and update slot 6.
1065  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1066  ref_frame_config->ref_idx[i] = 5 - shift;
1067  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1068  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1069  ref_frame_config->refresh[6 - shift] = 1;
1070  }
1071  } else if ((superframe_cnt - 3) % 4 == 0) {
1072  // Second top temporal enhancement layer.
1073  layer_id->temporal_layer_id = 2;
1074  if (layer_id->spatial_layer_id == 0) {
1075  // Set LAST to slot 5 and reference LAST.
1076  // Set GOLDEN to slot 3 and update slot 3.
1077  // Set all other buffer_idx to 0.
1078  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1079  ref_frame_config->ref_idx[i] = 0;
1080  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1081  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1082  ref_frame_config->refresh[3] = 1;
1083  } else if (layer_id->spatial_layer_id == 1) {
1084  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1085  // GOLDEN to slot 3. No update.
1086  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1087  ref_frame_config->ref_idx[i] = 0;
1088  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1089  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1090  }
1091  }
1092  break;
1093  case 8:
1094  // 3 spatial and 3 temporal layer.
1095  // Same as case 9 but overalap in the buffer slot updates.
1096  // (shift = 2). The slots 3 and 4 updated by first TL2 are
1097  // reused for update in TL1 superframe.
1098  // Note for this case, frame order hint must be disabled for
1099  // lower resolutios (operating points > 0) to be decoedable.
1100  case 9:
1101  // 3 spatial and 3 temporal layer.
1102  // No overlap in buffer updates between TL2 and TL1.
1103  // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1104  // Set the references via the svc_ref_frame_config control.
1105  // Always reference LAST.
1106  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1107  if (superframe_cnt % 4 == 0) {
1108  // Base temporal layer.
1109  layer_id->temporal_layer_id = 0;
1110  if (layer_id->spatial_layer_id == 0) {
1111  // Reference LAST, update LAST.
1112  // Set all buffer_idx to 0.
1113  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1114  ref_frame_config->ref_idx[i] = 0;
1115  ref_frame_config->refresh[0] = 1;
1116  } else if (layer_id->spatial_layer_id == 1) {
1117  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1118  // GOLDEN (and all other refs) to slot 0.
1119  // Update slot 1 (LAST).
1120  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1121  ref_frame_config->ref_idx[i] = 0;
1122  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1123  ref_frame_config->refresh[1] = 1;
1124  } else if (layer_id->spatial_layer_id == 2) {
1125  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1126  // GOLDEN (and all other refs) to slot 1.
1127  // Update slot 2 (LAST).
1128  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1129  ref_frame_config->ref_idx[i] = 1;
1130  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1131  ref_frame_config->refresh[2] = 1;
1132  }
1133  } else if ((superframe_cnt - 1) % 4 == 0) {
1134  // First top temporal enhancement layer.
1135  layer_id->temporal_layer_id = 2;
1136  if (layer_id->spatial_layer_id == 0) {
1137  // Reference LAST (slot 0).
1138  // Set GOLDEN to slot 3 and update slot 3.
1139  // Set all other buffer_idx to slot 0.
1140  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141  ref_frame_config->ref_idx[i] = 0;
1142  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1143  ref_frame_config->refresh[3] = 1;
1144  } else if (layer_id->spatial_layer_id == 1) {
1145  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1146  // GOLDEN (and all other refs) to slot 3.
1147  // Set LAST2 to slot 4 and Update slot 4.
1148  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1149  ref_frame_config->ref_idx[i] = 3;
1150  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1151  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1152  ref_frame_config->refresh[4] = 1;
1153  } else if (layer_id->spatial_layer_id == 2) {
1154  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1155  // GOLDEN (and all other refs) to slot 4.
1156  // No update.
1157  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1158  ref_frame_config->ref_idx[i] = 4;
1159  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1160  }
1161  } else if ((superframe_cnt - 2) % 4 == 0) {
1162  // Middle temporal enhancement layer.
1163  layer_id->temporal_layer_id = 1;
1164  if (layer_id->spatial_layer_id == 0) {
1165  // Reference LAST.
1166  // Set all buffer_idx to 0.
1167  // Set GOLDEN to slot 5 and update slot 5.
1168  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1169  ref_frame_config->ref_idx[i] = 0;
1170  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1171  ref_frame_config->refresh[5 - shift] = 1;
1172  } else if (layer_id->spatial_layer_id == 1) {
1173  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1174  // GOLDEN (and all other refs) to slot 5.
1175  // Set LAST3 to slot 6 and update slot 6.
1176  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1177  ref_frame_config->ref_idx[i] = 5 - shift;
1178  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1179  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1180  ref_frame_config->refresh[6 - shift] = 1;
1181  } else if (layer_id->spatial_layer_id == 2) {
1182  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1183  // GOLDEN (and all other refs) to slot 6.
1184  // Set LAST3 to slot 7 and update slot 7.
1185  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1186  ref_frame_config->ref_idx[i] = 6 - shift;
1187  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1188  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1189  ref_frame_config->refresh[7 - shift] = 1;
1190  }
1191  } else if ((superframe_cnt - 3) % 4 == 0) {
1192  // Second top temporal enhancement layer.
1193  layer_id->temporal_layer_id = 2;
1194  if (layer_id->spatial_layer_id == 0) {
1195  // Set LAST to slot 5 and reference LAST.
1196  // Set GOLDEN to slot 3 and update slot 3.
1197  // Set all other buffer_idx to 0.
1198  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1199  ref_frame_config->ref_idx[i] = 0;
1200  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1201  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1202  ref_frame_config->refresh[3] = 1;
1203  } else if (layer_id->spatial_layer_id == 1) {
1204  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1205  // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1206  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1207  ref_frame_config->ref_idx[i] = 0;
1208  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1209  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1210  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1211  ref_frame_config->refresh[4] = 1;
1212  } else if (layer_id->spatial_layer_id == 2) {
1213  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1214  // GOLDEN to slot 4. No update.
1215  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1216  ref_frame_config->ref_idx[i] = 0;
1217  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1218  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1219  }
1220  }
1221  break;
1222  case 11:
1223  // Simulcast mode for 3 spatial and 3 temporal layers.
1224  // No inter-layer predicton, only prediction is temporal and single
1225  // reference (LAST).
1226  // No overlap in buffer slots between spatial layers. So for example,
1227  // SL0 only uses slots 0 and 1.
1228  // SL1 only uses slots 2 and 3.
1229  // SL2 only uses slots 4 and 5.
1230  // All 7 references for each inter-frame must only access buffer slots
1231  // for that spatial layer.
1232  // On key (super)frames: SL1 and SL2 must have no references set
1233  // and must refresh all the slots for that layer only (so 2 and 3
1234  // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1235  // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1236  // internally as Intra-only frames that allow that stream to be decoded.
1237  // These conditions will allow for each spatial stream to be
1238  // independently decodeable.
1239 
1240  // Initialize all references to 0 (don't use reference).
1241  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1242  ref_frame_config->reference[i] = 0;
1243  // Initialize as no refresh/update for all slots.
1244  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1245  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1246  ref_frame_config->ref_idx[i] = 0;
1247 
1248  if (is_key_frame) {
1249  if (layer_id->spatial_layer_id == 0) {
1250  // Assign LAST/GOLDEN to slot 0/1.
1251  // Refesh slots 0 and 1 for SL0.
1252  // SL0: this will get set to KEY frame internally.
1253  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1254  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1255  ref_frame_config->refresh[0] = 1;
1256  ref_frame_config->refresh[1] = 1;
1257  } else if (layer_id->spatial_layer_id == 1) {
1258  // Assign LAST/GOLDEN to slot 2/3.
1259  // Refesh slots 2 and 3 for SL1.
1260  // This will get set to Intra-only frame internally.
1261  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1262  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1263  ref_frame_config->refresh[2] = 1;
1264  ref_frame_config->refresh[3] = 1;
1265  } else if (layer_id->spatial_layer_id == 2) {
1266  // Assign LAST/GOLDEN to slot 4/5.
1267  // Refresh slots 4 and 5 for SL2.
1268  // This will get set to Intra-only frame internally.
1269  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1270  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1271  ref_frame_config->refresh[4] = 1;
1272  ref_frame_config->refresh[5] = 1;
1273  }
1274  } else if (superframe_cnt % 4 == 0) {
1275  // Base temporal layer: TL0
1276  layer_id->temporal_layer_id = 0;
1277  if (layer_id->spatial_layer_id == 0) { // SL0
1278  // Reference LAST. Assign all references to either slot
1279  // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1280  // Update slot 0 (LAST).
1281  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1282  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1283  ref_frame_config->ref_idx[i] = 1;
1284  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1285  ref_frame_config->refresh[0] = 1;
1286  } else if (layer_id->spatial_layer_id == 1) { // SL1
1287  // Reference LAST. Assign all references to either slot
1288  // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1289  // Update slot 2 (LAST).
1290  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1291  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1292  ref_frame_config->ref_idx[i] = 3;
1293  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1294  ref_frame_config->refresh[2] = 1;
1295  } else if (layer_id->spatial_layer_id == 2) { // SL2
1296  // Reference LAST. Assign all references to either slot
1297  // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1298  // Update slot 4 (LAST).
1299  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1300  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1301  ref_frame_config->ref_idx[i] = 5;
1302  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1303  ref_frame_config->refresh[4] = 1;
1304  }
1305  } else if ((superframe_cnt - 1) % 4 == 0) {
1306  // First top temporal enhancement layer: TL2
1307  layer_id->temporal_layer_id = 2;
1308  if (layer_id->spatial_layer_id == 0) { // SL0
1309  // Reference LAST (slot 0). Assign other references to slot 1.
1310  // No update/refresh on any slots.
1311  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1312  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1313  ref_frame_config->ref_idx[i] = 1;
1314  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1315  } else if (layer_id->spatial_layer_id == 1) { // SL1
1316  // Reference LAST (slot 2). Assign other references to slot 3.
1317  // No update/refresh on any slots.
1318  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1319  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1320  ref_frame_config->ref_idx[i] = 3;
1321  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1322  } else if (layer_id->spatial_layer_id == 2) { // SL2
1323  // Reference LAST (slot 4). Assign other references to slot 4.
1324  // No update/refresh on any slots.
1325  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1326  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1327  ref_frame_config->ref_idx[i] = 5;
1328  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1329  }
1330  } else if ((superframe_cnt - 2) % 4 == 0) {
1331  // Middle temporal enhancement layer: TL1
1332  layer_id->temporal_layer_id = 1;
1333  if (layer_id->spatial_layer_id == 0) { // SL0
1334  // Reference LAST (slot 0).
1335  // Set GOLDEN to slot 1 and update slot 1.
1336  // This will be used as reference for next TL2.
1337  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1338  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1339  ref_frame_config->ref_idx[i] = 1;
1340  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1341  ref_frame_config->refresh[1] = 1;
1342  } else if (layer_id->spatial_layer_id == 1) { // SL1
1343  // Reference LAST (slot 2).
1344  // Set GOLDEN to slot 3 and update slot 3.
1345  // This will be used as reference for next TL2.
1346  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1347  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1348  ref_frame_config->ref_idx[i] = 3;
1349  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1350  ref_frame_config->refresh[3] = 1;
1351  } else if (layer_id->spatial_layer_id == 2) { // SL2
1352  // Reference LAST (slot 4).
1353  // Set GOLDEN to slot 5 and update slot 5.
1354  // This will be used as reference for next TL2.
1355  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1356  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1357  ref_frame_config->ref_idx[i] = 5;
1358  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1359  ref_frame_config->refresh[5] = 1;
1360  }
1361  } else if ((superframe_cnt - 3) % 4 == 0) {
1362  // Second top temporal enhancement layer: TL2
1363  layer_id->temporal_layer_id = 2;
1364  if (layer_id->spatial_layer_id == 0) { // SL0
1365  // Reference LAST (slot 1). Assign other references to slot 0.
1366  // No update/refresh on any slots.
1367  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1368  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1369  ref_frame_config->ref_idx[i] = 0;
1370  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1371  } else if (layer_id->spatial_layer_id == 1) { // SL1
1372  // Reference LAST (slot 3). Assign other references to slot 2.
1373  // No update/refresh on any slots.
1374  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1375  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1376  ref_frame_config->ref_idx[i] = 2;
1377  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1378  } else if (layer_id->spatial_layer_id == 2) { // SL2
1379  // Reference LAST (slot 5). Assign other references to slot 4.
1380  // No update/refresh on any slots.
1381  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1382  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1383  ref_frame_config->ref_idx[i] = 4;
1384  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1385  }
1386  }
1387  if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1388  // Always reference GOLDEN (inter-layer prediction).
1389  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1390  if (ksvc_mode) {
1391  // KSVC: only keep the inter-layer reference (GOLDEN) for
1392  // superframes whose base is key.
1393  if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1394  }
1395  if (is_key_frame && layer_id->spatial_layer_id > 1) {
1396  // On superframes whose base is key: remove LAST to avoid prediction
1397  // off layer two levels below.
1398  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1399  }
1400  }
1401  // For 3 spatial layer case 8 (where there is free buffer slot):
1402  // allow for top spatial layer to use additional temporal reference.
1403  // Additional reference is only updated on base temporal layer, every
1404  // 10 TL0 frames here.
1405  if (!simulcast_mode && enable_longterm_temporal_ref &&
1406  layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1407  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1408  if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1409  if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1410  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1411  }
1412  break;
1413  default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1414  }
1415  for (i = 0; i < REF_FRAMES; i++) {
1416  if (ref_frame_config->refresh[i] == 1) {
1417  *reference_updated = 1;
1418  break;
1419  }
1420  }
1421 }
1422 
1423 static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data,
1424  uint8_t bits, uint32_t offset = 0) {
1425  if (bits > 32) {
1426  die("Invalid bits value %d > 32\n", bits);
1427  }
1428  const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1);
1429  if (data < offset || (data - offset) > max) {
1430  die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset,
1431  (uint64_t)max + offset);
1432  }
1433  aom_wb_write_unsigned_literal(wb, data - offset, bits);
1434 }
1435 
1436 static void write_depth_representation_element(
1437  struct aom_write_bit_buffer *buffer,
1438  const std::pair<libaom_examples::DepthRepresentationElement, bool>
1439  &element) {
1440  if (!element.second) {
1441  return;
1442  }
1443  write_literal(buffer, element.first.sign_flag, 1);
1444  write_literal(buffer, element.first.exponent, 7);
1445  if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) {
1446  die("Invalid mantissan_len %d\n", element.first.mantissa_len);
1447  }
1448  write_literal(buffer, element.first.mantissa_len - 1, 5);
1449  write_literal(buffer, element.first.mantissa, element.first.mantissa_len);
1450 }
1451 
1452 static void write_color_properties(
1453  struct aom_write_bit_buffer *buffer,
1454  const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1455  write_literal(buffer, color_properties.second, 1);
1456  if (color_properties.second) {
1457  write_literal(buffer, color_properties.first.color_range, 1);
1458  write_literal(buffer, color_properties.first.color_primaries, 8);
1459  write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1460  write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1461  } else {
1462  write_literal(buffer, 0, 1); // reserved_1bit
1463  }
1464 }
1465 
1466 static void add_multilayer_metadata(
1467  aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1468  // Large enough buffer for the multilayer metadata.
1469  // Each layer's metadata is less than 100 bytes and there are at most 4
1470  // layers.
1471  std::vector<uint8_t> data(1024);
1472  struct aom_write_bit_buffer buffer = { data.data(), 0 };
1473 
1474  write_literal(&buffer, multilayer.use_case, 6);
1475  if (multilayer.layers.empty()) {
1476  die("Invalid multilayer metadata, no layers found\n");
1477  } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1478  die("Invalid multilayer metadata, too many layers (max is %d)\n",
1479  MAX_NUM_SPATIAL_LAYERS);
1480  }
1481  write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1482  assert(buffer.bit_offset % 8 == 0);
1483  for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1484  const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1485  // Alpha info with segmentation with labels can be up to about 66k bytes,
1486  // which requires 3 bytes to encode in leb128.
1487  const int bytes_reserved_for_size = 3;
1488  // Placeholder for layer_metadata_size which will be written later.
1489  write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1490  const uint32_t metadata_start = buffer.bit_offset;
1491  write_literal(&buffer, (int)i, 2); // ml_spatial_id
1492  write_literal(&buffer, layer.layer_type, 5);
1493  write_literal(&buffer, layer.luma_plane_only_flag, 1);
1494  write_literal(&buffer, layer.layer_view_type, 3);
1495  write_literal(&buffer, layer.group_id, 2);
1496  write_literal(&buffer, layer.layer_dependency_idc, 3);
1497  write_literal(&buffer, layer.layer_metadata_scope, 2);
1498  write_literal(&buffer, 0, 4); // ml_reserved_4bits
1499 
1500  if (i > 0) {
1501  write_color_properties(&buffer, layer.layer_color_description);
1502  } else {
1503  write_literal(&buffer, 0, 2); // ml_reserved_2bits
1504  }
1505  assert(buffer.bit_offset % 8 == 0);
1506 
1507  if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA &&
1508  layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1509  const libaom_examples::AlphaInformation &alpha_info =
1510  layer.global_alpha_info;
1511  write_literal(&buffer, alpha_info.alpha_use_idc, 2);
1512  write_literal(&buffer, alpha_info.alpha_simple_flag, 1);
1513  if (!alpha_info.alpha_simple_flag) {
1514  write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1515  write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1516  write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1517  write_literal(&buffer, alpha_info.alpha_transparent_value,
1518  alpha_info.alpha_bit_depth + 1);
1519  write_literal(&buffer, alpha_info.alpha_opaque_value,
1520  alpha_info.alpha_bit_depth + 1);
1521  if (buffer.bit_offset % 8 != 0) {
1522  // ai_byte_alignment_bits
1523  write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1524  }
1525  assert(buffer.bit_offset % 8 == 0);
1526 
1527  write_literal(&buffer, 0, 6); // ai_reserved_6bits
1528  write_color_properties(&buffer, alpha_info.alpha_color_description);
1529  } else {
1530  write_literal(&buffer, 0, 5); // ai_reserved_5bits
1531  }
1532 
1533  assert(buffer.bit_offset % 8 == 0);
1534  } else if (layer.layer_type ==
1535  libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH &&
1536  layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1537  const libaom_examples::DepthInformation &depth_info =
1538  layer.global_depth_info;
1539  write_literal(&buffer, depth_info.z_near.second, 1);
1540  write_literal(&buffer, depth_info.z_far.second, 1);
1541  write_literal(&buffer, depth_info.d_min.second, 1);
1542  write_literal(&buffer, depth_info.d_max.second, 1);
1543  write_literal(&buffer, depth_info.depth_representation_type, 4);
1544  if (depth_info.d_min.second || depth_info.d_max.second) {
1545  write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1546  }
1547  write_depth_representation_element(&buffer, depth_info.z_near);
1548  write_depth_representation_element(&buffer, depth_info.z_far);
1549  write_depth_representation_element(&buffer, depth_info.d_min);
1550  write_depth_representation_element(&buffer, depth_info.d_max);
1551  if (buffer.bit_offset % 8 != 0) {
1552  write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1553  }
1554  assert(buffer.bit_offset % 8 == 0);
1555  }
1556 
1557  assert(buffer.bit_offset % 8 == 0);
1558 
1559  const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1560  const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1561  size_t coded_size;
1562  if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1563  bytes_reserved_for_size,
1564  &buffer.bit_buffer[size_pos], &coded_size)) {
1565  // Need to increase bytes_reserved_for_size in the code above.
1566  die("Error: Failed to write metadata size\n");
1567  }
1568  }
1569  assert(buffer.bit_offset % 8 == 0);
1570  if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1571  buffer.bit_buffer, buffer.bit_offset / 8,
1572  AOM_MIF_KEY_FRAME)) {
1573  die("Error: Failed to add metadata\n");
1574  }
1575 }
1576 
1577 #if CONFIG_AV1_DECODER
1578 // Returns whether there is a mismatch between the encoder's new frame and the
1579 // decoder's new frame.
1580 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1581  const int frames_out) {
1582  aom_image_t enc_img, dec_img;
1583  int mismatch = 0;
1584 
1585  /* Get the internal new frame */
1588 
1589 #if CONFIG_AV1_HIGHBITDEPTH
1590  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1591  (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1592  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1593  aom_image_t enc_hbd_img;
1594  aom_img_alloc(
1595  &enc_hbd_img,
1596  static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1597  enc_img.d_w, enc_img.d_h, 16);
1598  aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1599  enc_img = enc_hbd_img;
1600  }
1601  if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1602  aom_image_t dec_hbd_img;
1603  aom_img_alloc(
1604  &dec_hbd_img,
1605  static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1606  dec_img.d_w, dec_img.d_h, 16);
1607  aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1608  dec_img = dec_hbd_img;
1609  }
1610  }
1611 #endif
1612 
1613  if (!aom_compare_img(&enc_img, &dec_img)) {
1614  int y[4], u[4], v[4];
1615 #if CONFIG_AV1_HIGHBITDEPTH
1616  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1617  aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1618  } else {
1619  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1620  }
1621 #else
1622  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1623 #endif
1624  fprintf(stderr,
1625  "Encode/decode mismatch on frame %d at"
1626  " Y[%d, %d] {%d/%d},"
1627  " U[%d, %d] {%d/%d},"
1628  " V[%d, %d] {%d/%d}\n",
1629  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1630  v[1], v[2], v[3]);
1631  mismatch = 1;
1632  }
1633 
1634  aom_img_free(&enc_img);
1635  aom_img_free(&dec_img);
1636  return mismatch;
1637 }
1638 #endif // CONFIG_AV1_DECODER
1639 
1640 struct psnr_stats {
1641  // The second element of these arrays is reserved for high bitdepth.
1642  uint64_t psnr_sse_total[2];
1643  uint64_t psnr_samples_total[2];
1644  double psnr_totals[2][4];
1645  int psnr_count[2];
1646 };
1647 
1648 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1649  double ovpsnr;
1650 
1651  if (!psnr_stream->psnr_count[0]) return;
1652 
1653  fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1654  ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1655  (double)psnr_stream->psnr_sse_total[0]);
1656  fprintf(stderr, " %.3f", ovpsnr);
1657 
1658  for (int i = 0; i < 4; i++) {
1659  fprintf(stderr, " %.3f",
1660  psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1661  }
1662  fprintf(stderr, "\n");
1663 }
1664 
1665 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1666  const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1667  aom::AV1RateControlRtcConfig rc_cfg;
1668  rc_cfg.width = cfg.g_w;
1669  rc_cfg.height = cfg.g_h;
1670  rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1671  rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1672  rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1673  rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1674  rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1675  rc_cfg.buf_sz = cfg.rc_buf_sz;
1676  rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1677  rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1678  // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1679  rc_cfg.max_intra_bitrate_pct = 300;
1680  rc_cfg.framerate = cfg.g_timebase.den;
1681  // TODO(jianj): Add suppor for SVC.
1682  rc_cfg.ss_number_layers = 1;
1683  rc_cfg.ts_number_layers = 1;
1684  rc_cfg.scaling_factor_num[0] = 1;
1685  rc_cfg.scaling_factor_den[0] = 1;
1686  rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1687  rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1688  rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1689  rc_cfg.aq_mode = app_input.aq_mode;
1690 
1691  return rc_cfg;
1692 }
1693 
1694 static int qindex_to_quantizer(int qindex) {
1695  // Table that converts 0-63 range Q values passed in outside to the 0-255
1696  // range Qindex used internally.
1697  static const int quantizer_to_qindex[] = {
1698  0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1699  52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1700  104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1701  156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1702  208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1703  };
1704  for (int quantizer = 0; quantizer < 64; ++quantizer)
1705  if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1706 
1707  return 63;
1708 }
1709 
1710 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1711  aom_codec_ctx_t *codec, int frame_cnt) {
1712  aom_active_map_t map = { 0, 0, 0 };
1713 
1714  map.rows = (cfg->g_h + 15) / 16;
1715  map.cols = (cfg->g_w + 15) / 16;
1716 
1717  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1718  if (!map.active_map) die("Failed to allocate active map");
1719 
1720  // Example map for testing.
1721  for (unsigned int i = 0; i < map.rows; ++i) {
1722  for (unsigned int j = 0; j < map.cols; ++j) {
1723  int index = map.cols * i + j;
1724  map.active_map[index] = 1;
1725  if (frame_cnt < 300) {
1726  if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1727  } else if (frame_cnt >= 300) {
1728  if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1729  }
1730  }
1731  }
1732 
1733  if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1734  die_codec(codec, "Failed to set active map");
1735 
1736  free(map.active_map);
1737 }
1738 
1739 static void set_roi_map(const aom_codec_enc_cfg_t *cfg, aom_codec_ctx_t *codec,
1740  int roi_feature) {
1741  aom_roi_map_t roi = aom_roi_map_t();
1742  const int block_size = 4;
1743  roi.rows = (cfg->g_h + block_size - 1) / block_size;
1744  roi.cols = (cfg->g_w + block_size - 1) / block_size;
1745  memset(&roi.skip, 0, sizeof(roi.skip));
1746  memset(&roi.delta_q, 0, sizeof(roi.delta_q));
1747  memset(&roi.delta_lf, 0, sizeof(roi.delta_lf));
1748  memset(roi.ref_frame, -1, sizeof(roi.ref_frame));
1749  // Set ROI map to be 1 (segment #1) in middle square of image,
1750  // 0 elsewhere.
1751  roi.enabled = 1;
1752  roi.roi_map = (uint8_t *)calloc(roi.rows * roi.cols, sizeof(*roi.roi_map));
1753  for (unsigned int i = 0; i < roi.rows; ++i) {
1754  for (unsigned int j = 0; j < roi.cols; ++j) {
1755  const int idx = i * roi.cols + j;
1756  if (i > roi.rows / 4 && i < (3 * roi.rows) / 4 && j > roi.cols / 4 &&
1757  j < (3 * roi.cols) / 4)
1758  roi.roi_map[idx] = 1;
1759  else
1760  roi.roi_map[idx] = 0;
1761  }
1762  }
1763  // Set the ROI feature, on segment #1.
1764  if (roi_feature == kSkip)
1765  roi.skip[1] = 1;
1766  else if (roi_feature == kDeltaQ)
1767  roi.delta_q[1] = -40;
1768  else if (roi_feature == kDeltaLF)
1769  roi.delta_lf[1] = 40;
1770  else if (roi_feature == kReference)
1771  roi.ref_frame[1] = 4; // GOLDEN_FRAME
1772 
1773  if (aom_codec_control(codec, AOME_SET_ROI_MAP, &roi))
1774  die_codec(codec, "Failed to set roi map");
1775 
1776  free(roi.roi_map);
1777 }
1778 int main(int argc, const char **argv) {
1779  AppInput app_input;
1780  AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1781  FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1782  AvxVideoWriter *total_layer_file = NULL;
1783  FILE *total_layer_obu_file = NULL;
1784  aom_codec_enc_cfg_t cfg;
1785  int frame_cnt = 0;
1786  aom_image_t raw;
1787  int frame_avail;
1788  int got_data = 0;
1789  int flags = 0;
1790  int i;
1791  int pts = 0; // PTS starts at 0.
1792  int frame_duration = 1; // 1 timebase tick per frame.
1793  aom_svc_layer_id_t layer_id;
1794  aom_svc_params_t svc_params;
1795  aom_svc_ref_frame_config_t ref_frame_config;
1796  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1797 
1798 #if CONFIG_INTERNAL_STATS
1799  FILE *stats_file = fopen("opsnr.stt", "a");
1800  if (stats_file == NULL) {
1801  die("Cannot open opsnr.stt\n");
1802  }
1803 #endif
1804 #if CONFIG_AV1_DECODER
1805  aom_codec_ctx_t decoder;
1806 #endif
1807 
1808  struct RateControlMetrics rc;
1809  int64_t cx_time = 0;
1810  int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1811  int frame_cnt_layer[AOM_MAX_LAYERS];
1812  double sum_bitrate = 0.0;
1813  double sum_bitrate2 = 0.0;
1814  double framerate = 30.0;
1815  int use_svc_control = 1;
1816  int set_err_resil_frame = 0;
1817  int test_changing_bitrate = 0;
1818  zero(rc.layer_target_bitrate);
1819  memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1820  memset(&app_input, 0, sizeof(AppInput));
1821  memset(&svc_params, 0, sizeof(svc_params));
1822 
1823  // Flag to test dynamic scaling of source frames for single
1824  // spatial stream, using the scaling_mode control.
1825  const int test_dynamic_scaling_single_layer = 0;
1826 
1827  // Flag to test setting speed per layer.
1828  const int test_speed_per_layer = 0;
1829 
1830  // Flag for testing active maps.
1831  const int test_active_maps = 0;
1832 
1833  // Flag for testing roi map.
1834  const int test_roi_map = 0;
1835 
1836  /* Setup default input stream settings */
1837  for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1838  app_input.input_ctx[i].framerate.numerator = 30;
1839  app_input.input_ctx[i].framerate.denominator = 1;
1840  app_input.input_ctx[i].only_i420 = 0;
1841  app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1842  }
1843  app_input.speed = 7;
1844  exec_name = argv[0];
1845 
1846  // start with default encoder configuration
1847 #if GOOD_QUALITY
1850 #else
1853 #endif
1854  if (res != AOM_CODEC_OK) {
1855  die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1856  }
1857 
1858 #if GOOD_QUALITY
1860 #else
1861  // Real time parameters.
1863 #endif
1864 
1865  cfg.rc_end_usage = AOM_CBR;
1866  cfg.rc_min_quantizer = 2;
1867  cfg.rc_max_quantizer = 52;
1868  cfg.rc_undershoot_pct = 50;
1869  cfg.rc_overshoot_pct = 50;
1870  cfg.rc_buf_initial_sz = 600;
1871  cfg.rc_buf_optimal_sz = 600;
1872  cfg.rc_buf_sz = 1000;
1873  cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1874  cfg.g_lag_in_frames = 0;
1875  cfg.kf_mode = AOM_KF_AUTO;
1876  cfg.g_w = 0; // Force user to specify width and height for raw input.
1877  cfg.g_h = 0;
1878 
1879  parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1880 
1881  int ts_number_layers = svc_params.number_temporal_layers;
1882  int ss_number_layers = svc_params.number_spatial_layers;
1883 
1884  unsigned int width = cfg.g_w;
1885  unsigned int height = cfg.g_h;
1886 
1887  if (app_input.layering_mode >= 0) {
1888  if (ts_number_layers !=
1889  mode_to_num_temporal_layers[app_input.layering_mode] ||
1890  ss_number_layers !=
1891  mode_to_num_spatial_layers[app_input.layering_mode]) {
1892  die("Number of layers doesn't match layering mode.");
1893  }
1894  }
1895 
1896  bool has_non_y4m_input = false;
1897  for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1898  if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1899  has_non_y4m_input = true;
1900  break;
1901  }
1902  }
1903  // Y4M reader has its own allocation.
1904  if (has_non_y4m_input) {
1905  if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1906  die("Failed to allocate image (%dx%d)", width, height);
1907  }
1908  }
1909 
1910  aom_codec_iface_t *encoder = aom_codec_av1_cx();
1911 
1912  memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1913  sizeof(svc_params.layer_target_bitrate));
1914 
1915  unsigned int total_rate = 0;
1916  for (i = 0; i < ss_number_layers; i++) {
1917  total_rate +=
1918  svc_params
1919  .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1920  }
1921  if (total_rate != cfg.rc_target_bitrate) {
1922  die("Incorrect total target bitrate, expected: %d", total_rate);
1923  }
1924 
1925  svc_params.framerate_factor[0] = 1;
1926  if (ts_number_layers == 2) {
1927  svc_params.framerate_factor[0] = 2;
1928  svc_params.framerate_factor[1] = 1;
1929  } else if (ts_number_layers == 3) {
1930  svc_params.framerate_factor[0] = 4;
1931  svc_params.framerate_factor[1] = 2;
1932  svc_params.framerate_factor[2] = 1;
1933  }
1934 
1935  libaom_examples::MultilayerMetadata multilayer_metadata;
1936  if (app_input.multilayer_metadata_file != NULL) {
1937  if (!libaom_examples::parse_multilayer_file(
1938  app_input.multilayer_metadata_file, &multilayer_metadata)) {
1939  die("Failed to parse multilayer metadata");
1940  }
1941  libaom_examples::print_multilayer_metadata(multilayer_metadata);
1942  }
1943 
1944  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1945  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1946 
1947  AvxVideoInfo info;
1948  info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1949  info.frame_width = cfg.g_w;
1950  info.frame_height = cfg.g_h;
1951  info.time_base.numerator = cfg.g_timebase.num;
1952  info.time_base.denominator = cfg.g_timebase.den;
1953  // Open an output file for each stream.
1954  for (int sl = 0; sl < ss_number_layers; ++sl) {
1955  for (int tl = 0; tl < ts_number_layers; ++tl) {
1956  i = sl * ts_number_layers + tl;
1957  char file_name[PATH_MAX];
1958  snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1959  app_input.output_filename, i);
1960  if (app_input.output_obu) {
1961  obu_files[i] = fopen(file_name, "wb");
1962  if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1963  } else {
1964  outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1965  if (!outfile[i]) die("Failed to open %s for writing", file_name);
1966  }
1967  }
1968  }
1969  if (app_input.output_obu) {
1970  total_layer_obu_file = fopen(app_input.output_filename, "wb");
1971  if (!total_layer_obu_file)
1972  die("Failed to open %s for writing", app_input.output_filename);
1973  } else {
1974  total_layer_file =
1975  aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1976  if (!total_layer_file)
1977  die("Failed to open %s for writing", app_input.output_filename);
1978  }
1979 
1980  // Initialize codec.
1981  aom_codec_ctx_t codec;
1982  aom_codec_flags_t flag = 0;
1984  flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1985  if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1986  die_codec(&codec, "Failed to initialize encoder");
1987 
1988 #if CONFIG_AV1_DECODER
1989  if (app_input.decode) {
1990  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1991  die_codec(&decoder, "Failed to initialize decoder");
1992  }
1993 #endif
1994 
1995  aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1996  aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
2006 #if GOOD_QUALITY
2011 #else
2016 #endif
2018 
2019  // Settings to reduce key frame encoding time.
2025 
2027 
2028  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
2029  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
2031  // INTRABC is currently disabled for rt mode, as it's too slow.
2033  }
2034 
2035  if (app_input.use_external_rc) {
2037  }
2038 
2040 
2043 
2045 
2046  svc_params.number_spatial_layers = ss_number_layers;
2047  svc_params.number_temporal_layers = ts_number_layers;
2048  for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
2049  svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
2050  svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
2051  }
2052  // SET QUANTIZER PER LAYER, E.G FOR 2 SPATIAL LAYERS:
2053  // svc_params.max_quantizers[0] = 40;
2054  // svc_params.min_quantizers[0] = 40;
2055  // svc_params.max_quantizers[1] = 50;
2056  // svc_params.min_quantizers[1] = 50;
2057 
2058  if (!app_input.scale_factors_explicitly_set) {
2059  for (i = 0; i < ss_number_layers; ++i) {
2060  svc_params.scaling_factor_num[i] = 1;
2061  svc_params.scaling_factor_den[i] = 1;
2062  }
2063  if (ss_number_layers == 2) {
2064  svc_params.scaling_factor_num[0] = 1;
2065  svc_params.scaling_factor_den[0] = 2;
2066  } else if (ss_number_layers == 3) {
2067  svc_params.scaling_factor_num[0] = 1;
2068  svc_params.scaling_factor_den[0] = 4;
2069  svc_params.scaling_factor_num[1] = 1;
2070  svc_params.scaling_factor_den[1] = 2;
2071  }
2072  }
2073  aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
2074  // TODO(aomedia:3032): Configure KSVC in fixed mode.
2075 
2076  // This controls the maximum target size of the key frame.
2077  // For generating smaller key frames, use a smaller max_intra_size_pct
2078  // value, like 100 or 200.
2079  {
2080  const int max_intra_size_pct = 300;
2082  max_intra_size_pct);
2083  }
2084 
2085  for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
2086  cx_time_layer[lx] = 0;
2087  frame_cnt_layer[lx] = 0;
2088  }
2089 
2090  std::unique_ptr<aom::AV1RateControlRTC> rc_api;
2091  if (app_input.use_external_rc) {
2092  const aom::AV1RateControlRtcConfig rc_cfg =
2093  create_rtc_rc_config(cfg, app_input);
2094  rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
2095  }
2096 
2097  frame_avail = 1;
2098  struct psnr_stats psnr_stream;
2099  memset(&psnr_stream, 0, sizeof(psnr_stream));
2100  while (frame_avail || got_data) {
2101  struct aom_usec_timer timer;
2102  frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2103  // Loop over spatial layers.
2104  for (int slx = 0; slx < ss_number_layers; slx++) {
2105  if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2106  const int previous_layer_frame_avail = frame_avail;
2107  frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2108  if (previous_layer_frame_avail != frame_avail) {
2109  die("Mismatch in number of frames between spatial layer input files");
2110  }
2111  }
2112 
2113  aom_codec_iter_t iter = NULL;
2114  const aom_codec_cx_pkt_t *pkt;
2115  int reference_updated = 0;
2116  int layer = 0;
2117  // Flag for superframe whose base is key.
2118  int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2119  // For flexible mode:
2120  if (app_input.layering_mode >= 0) {
2121  // Set the reference/update flags, layer_id, and reference_map
2122  // buffer index.
2123  set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2124  &ref_frame_config, &ref_frame_comp_pred,
2125  &use_svc_control, slx, is_key_frame,
2126  (app_input.layering_mode == 10), app_input.speed,
2127  &reference_updated, test_roi_map);
2128  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2129  if (use_svc_control) {
2131  &ref_frame_config);
2133  &ref_frame_comp_pred);
2134  }
2135  if (app_input.multilayer_metadata_file != NULL) {
2136  add_multilayer_metadata(&raw, multilayer_metadata);
2137  }
2138  // Set the speed per layer.
2139  if (test_speed_per_layer) {
2140  int speed_per_layer = 10;
2141  if (layer_id.spatial_layer_id == 0) {
2142  if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2143  if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2144  if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2145  } else if (layer_id.spatial_layer_id == 1) {
2146  if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2147  if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2148  if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2149  } else if (layer_id.spatial_layer_id == 2) {
2150  if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2151  if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2152  if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2153  }
2154  aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2155  }
2156  } else {
2157  // Only up to 3 temporal layers supported in fixed mode.
2158  // Only need to set spatial and temporal layer_id: reference
2159  // prediction, refresh, and buffer_idx are set internally.
2160  layer_id.spatial_layer_id = slx;
2161  layer_id.temporal_layer_id = 0;
2162  if (ts_number_layers == 2) {
2163  layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2164  } else if (ts_number_layers == 3) {
2165  if (frame_cnt % 2 != 0)
2166  layer_id.temporal_layer_id = 2;
2167  else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2168  layer_id.temporal_layer_id = 1;
2169  }
2170  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2171  }
2172 
2173  if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2174  // Set error_resilient per frame: off/0 for base layer and
2175  // on/1 for enhancement layer frames.
2176  // Note that this is can only be done on the fly/per-frame/layer
2177  // if the config error_resilience is off/0. See the logic for updating
2178  // in set_encoder_config():
2179  // tool_cfg->error_resilient_mode =
2180  // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2181  const int err_resil_mode =
2182  layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2184  err_resil_mode);
2185  }
2186 
2187  layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2188  if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2189 
2190  if (test_dynamic_scaling_single_layer) {
2191  // Example to scale source down by 2x2, then 4x4, and then back up to
2192  // 2x2, and then back to original.
2193  int frame_2x2 = 200;
2194  int frame_4x4 = 400;
2195  int frame_2x2up = 600;
2196  int frame_orig = 800;
2197  if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2198  // Scale source down by 2x2.
2199  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2200  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2201  } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2202  // Scale source down by 4x4.
2203  struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2204  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2205  } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2206  // Source back up to 2x2.
2207  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2208  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2209  } else if (frame_cnt >= frame_orig) {
2210  // Source back up to original resolution (no scaling).
2211  struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2212  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2213  }
2214  if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2215  frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2216  // For dynamic resize testing on single layer: refresh all references
2217  // on the resized frame: this is to avoid decode error:
2218  // if resize goes down by >= 4x4 then libaom decoder will throw an
2219  // error that some reference (even though not used) is beyond the
2220  // limit size (must be smaller than 4x4).
2221  for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2222  if (use_svc_control) {
2224  &ref_frame_config);
2226  &ref_frame_comp_pred);
2227  }
2228  }
2229  }
2230 
2231  // Change target_bitrate every other frame.
2232  if (test_changing_bitrate && frame_cnt % 2 == 0) {
2233  if (frame_cnt < 500)
2234  cfg.rc_target_bitrate += 10;
2235  else
2236  cfg.rc_target_bitrate -= 10;
2237  // Do big increase and decrease.
2238  if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2239  if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2240  if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2241  // Call change_config, or bypass with new control.
2242  // res = aom_codec_enc_config_set(&codec, &cfg);
2244  cfg.rc_target_bitrate))
2245  die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2246  }
2247 
2248  if (rc_api) {
2249  aom::AV1FrameParamsRTC frame_params;
2250  // TODO(jianj): Add support for SVC.
2251  frame_params.spatial_layer_id = 0;
2252  frame_params.temporal_layer_id = 0;
2253  frame_params.frame_type =
2254  is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2255  rc_api->ComputeQP(frame_params);
2256  const int current_qp = rc_api->GetQP();
2258  qindex_to_quantizer(current_qp))) {
2259  die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2260  }
2261  }
2262 
2263  if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2264 
2265  if (test_roi_map) set_roi_map(&cfg, &codec, kDeltaQ);
2266 
2267  // Do the layer encode.
2268  aom_usec_timer_start(&timer);
2269  if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2270  die_codec(&codec, "Failed to encode frame");
2271  aom_usec_timer_mark(&timer);
2272  cx_time += aom_usec_timer_elapsed(&timer);
2273  cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2274  frame_cnt_layer[layer] += 1;
2275 
2276  // Get the high motion content flag.
2277  int content_flag = 0;
2279  &content_flag)) {
2280  die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2281  }
2282 
2283  got_data = 0;
2284  // For simulcast (mode 11): write out each spatial layer to the file.
2285  int ss_layers_write = (app_input.layering_mode == 11)
2286  ? layer_id.spatial_layer_id + 1
2287  : ss_number_layers;
2288  while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2289  switch (pkt->kind) {
2291  for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2292  ++sl) {
2293  for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2294  ++tl) {
2295  int j = sl * ts_number_layers + tl;
2296  if (app_input.output_obu) {
2297  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2298  obu_files[j]);
2299  } else {
2300  aom_video_writer_write_frame(
2301  outfile[j],
2302  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2303  pkt->data.frame.sz, pts);
2304  }
2305  if (sl == layer_id.spatial_layer_id)
2306  rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2307  }
2308  }
2309  got_data = 1;
2310  // Write everything into the top layer.
2311  if (app_input.output_obu) {
2312  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2313  total_layer_obu_file);
2314  } else {
2315  aom_video_writer_write_frame(
2316  total_layer_file,
2317  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2318  pkt->data.frame.sz, pts);
2319  }
2320  // Keep count of rate control stats per layer (for non-key).
2321  if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2322  int j = layer_id.spatial_layer_id * ts_number_layers +
2323  layer_id.temporal_layer_id;
2324  assert(j >= 0);
2325  rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2326  rc.layer_avg_rate_mismatch[j] +=
2327  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2328  rc.layer_pfb[j];
2329  if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2330  }
2331 
2332  if (rc_api) {
2333  rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2334  }
2335  // Update for short-time encoding bitrate states, for moving window
2336  // of size rc->window, shifted by rc->window / 2.
2337  // Ignore first window segment, due to key frame.
2338  // For spatial layers: only do this for top/highest SL.
2339  if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2340  sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2341  rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2342  if (frame_cnt % rc.window_size == 0) {
2343  rc.window_count += 1;
2344  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2345  rc.variance_st_encoding_bitrate +=
2346  (sum_bitrate / rc.window_size) *
2347  (sum_bitrate / rc.window_size);
2348  sum_bitrate = 0.0;
2349  }
2350  }
2351  // Second shifted window.
2352  if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2353  slx == ss_number_layers - 1) {
2354  sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2355  if (frame_cnt > 2 * rc.window_size &&
2356  frame_cnt % rc.window_size == 0) {
2357  rc.window_count += 1;
2358  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2359  rc.variance_st_encoding_bitrate +=
2360  (sum_bitrate2 / rc.window_size) *
2361  (sum_bitrate2 / rc.window_size);
2362  sum_bitrate2 = 0.0;
2363  }
2364  }
2365 
2366 #if CONFIG_AV1_DECODER
2367  if (app_input.decode) {
2368  if (aom_codec_decode(
2369  &decoder,
2370  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2371  pkt->data.frame.sz, NULL))
2372  die_codec(&decoder, "Failed to decode frame");
2373  }
2374 #endif
2375 
2376  break;
2377  case AOM_CODEC_PSNR_PKT:
2378  if (app_input.show_psnr) {
2379  psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2380  psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2381  for (int plane = 0; plane < 4; plane++) {
2382  psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2383  }
2384  psnr_stream.psnr_count[0]++;
2385  }
2386  break;
2387  default: break;
2388  }
2389  }
2390 #if CONFIG_AV1_DECODER
2391  if (got_data && app_input.decode) {
2392  // Don't look for mismatch on non reference frames.
2393  if (reference_updated) {
2394  if (test_decode(&codec, &decoder, frame_cnt)) {
2395 #if CONFIG_INTERNAL_STATS
2396  fprintf(stats_file, "First mismatch occurred in frame %d\n",
2397  frame_cnt);
2398  fclose(stats_file);
2399 #endif
2400  fatal("Mismatch seen");
2401  }
2402  }
2403  }
2404 #endif
2405  } // loop over spatial layers
2406  ++frame_cnt;
2407  pts += frame_duration;
2408  }
2409 
2410  for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2411  if (app_input.input_ctx[i].filename == NULL) {
2412  break;
2413  }
2414  close_input_file(&(app_input.input_ctx[i]));
2415  }
2416  printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2417  ts_number_layers);
2418 
2419  printf("\n");
2420  for (int slx = 0; slx < ss_number_layers; slx++)
2421  for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2422  int lx = slx * ts_number_layers + tlx;
2423  printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2424  slx, tlx, frame_cnt_layer[lx],
2425  (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2426  1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2427  }
2428 
2429  printf("\n");
2430  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2431  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2432  1000000 * (double)frame_cnt / (double)cx_time);
2433 
2434  if (app_input.show_psnr) {
2435  show_psnr(&psnr_stream, 255.0);
2436  }
2437 
2438  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2439 
2440 #if CONFIG_AV1_DECODER
2441  if (app_input.decode) {
2442  if (aom_codec_destroy(&decoder))
2443  die_codec(&decoder, "Failed to destroy decoder");
2444  }
2445 #endif
2446 
2447 #if CONFIG_INTERNAL_STATS
2448  fprintf(stats_file, "No mismatch detected in recon buffers\n");
2449  fclose(stats_file);
2450 #endif
2451 
2452  // Try to rewrite the output file headers with the actual frame count.
2453  for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2454  aom_video_writer_close(outfile[i]);
2455  aom_video_writer_close(total_layer_file);
2456 
2457  if (has_non_y4m_input) {
2458  aom_img_free(&raw);
2459  }
2460  return EXIT_SUCCESS;
2461 }
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
Describes the aom image descriptor and associated operations.
@ AOM_MIF_KEY_FRAME
Definition: aom_image.h:166
@ AOM_CSP_UNKNOWN
Definition: aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition: aom_image.h:38
@ AOM_IMG_FMT_I420
Definition: aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
int aom_img_add_metadata(aom_image_t *img, uint32_t type, const uint8_t *data, size_t sz, aom_metadata_insert_flags_t insert_flag)
Add metadata to image.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition: aomcx.h:1779
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
#define AOM_MAX_TS_LAYERS
Definition: aomcx.h:1781
struct aom_roi_map aom_roi_map_t
aom region of interest map
@ AOM_FULL_SUPERFRAME_DROP
Definition: aomcx.h:1853
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition: aomcx.h:1545
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition: aomcx.h:1081
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition: aomcx.h:418
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition: aomcx.h:478
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition: aomcx.h:1293
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter.
Definition: aomcx.h:1303
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition: aomcx.h:507
@ AOME_SET_ROI_MAP
Codec control function to pass an ROI map to encoder, aom_roi_map_t* parameter.
Definition: aomcx.h:185
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition: aomcx.h:516
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition: aomcx.h:1128
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition: aomcx.h:1271
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition: aomcx.h:1220
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition: aomcx.h:1408
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition: aomcx.h:1124
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition: aomcx.h:1049
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition: aomcx.h:1444
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition: aomcx.h:1251
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition: aomcx.h:681
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition: aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition: aomcx.h:1374
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition: aomcx.h:1558
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition: aomcx.h:1298
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition: aomcx.h:1070
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition: aomcx.h:1120
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition: aomcx.h:1099
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition: aomcx.h:312
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition: aomcx.h:452
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition: aomcx.h:708
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition: aomcx.h:1566
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition: aomcx.h:1424
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition: aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition: aomcx.h:876
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition: aomcx.h:1148
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition: aomcx.h:1582
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition: aomcx.h:1039
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition: aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition: aomcx.h:1573
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition: aomcx.h:349
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition: aomcx.h:1507
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition: aomcx.h:1261
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition: aomcx.h:1588
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition: aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition: aom_codec.h:232
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition: aom_codec.h:271
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
aom_codec_err_t
Algorithm return codes.
Definition: aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition: aom_codec.h:542
const void * aom_codec_iter_t
Iterator.
Definition: aom_codec.h:305
#define AOM_FRAME_IS_KEY
Definition: aom_codec.h:288
@ AOM_BITS_8
Definition: aom_codec.h:336
@ AOM_BITS_10
Definition: aom_codec.h:337
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition: aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition: aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition: aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition: aom_decoder.h:129
#define AOM_USAGE_GOOD_QUALITY
usage parameter analogous to AV1 GOOD QUALITY mode.
Definition: aom_encoder.h:1016
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition: aom_encoder.h:945
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition: aom_encoder.h:1018
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition: aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition: aom_encoder.h:79
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
@ AOM_CBR
Definition: aom_encoder.h:187
@ AOM_KF_AUTO
Definition: aom_encoder.h:202
@ AOM_CODEC_PSNR_PKT
Definition: aom_encoder.h:113
@ AOM_CODEC_CX_FRAME_PKT
Definition: aom_encoder.h:110
aom active region map
Definition: aomcx.h:1676
unsigned int rows
Definition: aomcx.h:1679
unsigned int cols
Definition: aomcx.h:1680
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition: aomcx.h:1678
Codec context structure.
Definition: aom_codec.h:315
Encoder output packet.
Definition: aom_encoder.h:122
enum aom_codec_cx_pkt_kind kind
Definition: aom_encoder.h:123
double psnr[4]
Definition: aom_encoder.h:145
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
Encoder configuration structure.
Definition: aom_encoder.h:389
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: aom_encoder.h:477
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: aom_encoder.h:542
struct aom_rational g_timebase
Stream timebase units.
Definition: aom_encoder.h:491
unsigned int g_usage
Algorithm specific "usage" value.
Definition: aom_encoder.h:401
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: aom_encoder.h:707
unsigned int g_h
Height of the frame.
Definition: aom_encoder.h:437
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition: aom_encoder.h:770
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: aom_encoder.h:625
unsigned int g_threads
Maximum number of threads to use.
Definition: aom_encoder.h:409
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: aom_encoder.h:779
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: aom_encoder.h:520
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: aom_encoder.h:716
unsigned int g_profile
Bitstream profile to use.
Definition: aom_encoder.h:419
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: aom_encoder.h:469
unsigned int g_w
Width of the frame.
Definition: aom_encoder.h:428
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: aom_encoder.h:683
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: aom_encoder.h:788
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: aom_encoder.h:499
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: aom_encoder.h:670
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: aom_encoder.h:725
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: aom_encoder.h:660
unsigned int rc_target_bitrate
Target data rate.
Definition: aom_encoder.h:646
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition: aom_encoder.h:551
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: aom_encoder.h:692
Image Descriptor.
Definition: aom_image.h:182
aom_img_fmt_t fmt
Definition: aom_image.h:183
unsigned int d_w
Definition: aom_image.h:197
unsigned int d_h
Definition: aom_image.h:198
int num
Definition: aom_encoder.h:165
int den
Definition: aom_encoder.h:166
aom region of interest map
Definition: aomcx.h:1654
unsigned int cols
Definition: aomcx.h:1660
int delta_lf[8]
Definition: aomcx.h:1662
int ref_frame[8]
Definition: aomcx.h:1664
unsigned int rows
Definition: aomcx.h:1659
unsigned char * roi_map
Definition: aomcx.h:1658
int delta_q[8]
Definition: aomcx.h:1661
uint8_t enabled
Definition: aomcx.h:1656
int skip[8]
Definition: aomcx.h:1663
aom image scaling mode
Definition: aomcx.h:1688
Struct for spatial and temporal layer ID.
Definition: aomcx.h:1784
int temporal_layer_id
Definition: aomcx.h:1786
int spatial_layer_id
Definition: aomcx.h:1785
Parameter type for SVC.
Definition: aomcx.h:1795
int max_quantizers[32]
Definition: aomcx.h:1810
int number_spatial_layers
Definition: aomcx.h:1802
int layer_target_bitrate[32]
Definition: aomcx.h:1815
int framerate_factor[8]
Definition: aomcx.h:1817
int min_quantizers[32]
Definition: aomcx.h:1811
int scaling_factor_den[4]
Definition: aomcx.h:1813
int number_temporal_layers
Definition: aomcx.h:1809
int scaling_factor_num[4]
Definition: aomcx.h:1812
Parameters for setting ref frame compound prediction.
Definition: aomcx.h:1844
int use_comp_pred[3]
Definition: aomcx.h:1847
Parameters for setting ref frame config.
Definition: aomcx.h:1821
int reference[7]
Definition: aomcx.h:1837
int refresh[8]
Definition: aomcx.h:1840
int ref_idx[7]
Definition: aomcx.h:1839