Преглед изворни кода

Refine code style to follow sherpa-ncnn (#178)

Winlin пре 2 година
родитељ
комит
fd6bfb9ea3
1 измењених фајлова са 87 додато и 83 уклоњено
  1. 87 83
      ffmpeg-examples/sherpa-ncnn-ffmpeg.cc

+ 87 - 83
ffmpeg-examples/sherpa-ncnn-ffmpeg.cc

@@ -75,47 +75,48 @@ extern "C" {
 }
 #endif
 
-static const char *filter_descr =
+static const char *ffmpeg_filter_descr =
     "aresample=16000,aformat=sample_fmts=s16:channel_layouts=mono";
 
-static AVFormatContext *fmt_ctx;
-static AVCodecContext *dec_ctx;
-AVFilterContext *buffersink_ctx;
-AVFilterContext *buffersrc_ctx;
-AVFilterGraph *filter_graph;
-static int32_t audio_stream_index = -1;
+static AVFormatContext *ffmpeg_fmt_ctx;
+static AVCodecContext *ffmpeg_dec_ctx;
+static AVFilterContext *ffmpeg_buffersink_ctx;
+static AVFilterContext *ffmpeg_buffersrc_ctx;
+static AVFilterGraph *ffmpeg_filter_graph;
+static int32_t ffmpeg_audio_stream_index = -1;
 
 static int32_t FFmpegOpenInputFile(const char *filename) {
-  const AVCodec *dec;
   int32_t ret;
-
-  if ((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
+  if ((ret = avformat_open_input(&ffmpeg_fmt_ctx, filename, NULL, NULL)) < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot open input file %s\n", filename);
     return ret;
   }
 
-  if ((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
+  if ((ret = avformat_find_stream_info(ffmpeg_fmt_ctx, NULL)) < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
     return ret;
   }
 
   /* select the audio stream */
-  ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
+  const AVCodec *dec;
+  ret =
+      av_find_best_stream(ffmpeg_fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
   if (ret < 0) {
     av_log(NULL, AV_LOG_ERROR,
            "Cannot find an audio stream in the input file\n");
     return ret;
   }
-  audio_stream_index = ret;
+  ffmpeg_audio_stream_index = ret;
 
   /* create decoding context */
-  dec_ctx = avcodec_alloc_context3(dec);
-  if (!dec_ctx) return AVERROR(ENOMEM);
-  avcodec_parameters_to_context(dec_ctx,
-                                fmt_ctx->streams[audio_stream_index]->codecpar);
+  ffmpeg_dec_ctx = avcodec_alloc_context3(dec);
+  if (!ffmpeg_dec_ctx) return AVERROR(ENOMEM);
+  avcodec_parameters_to_context(
+      ffmpeg_dec_ctx,
+      ffmpeg_fmt_ctx->streams[ffmpeg_audio_stream_index]->codecpar);
 
   /* init the audio decoder */
-  if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
+  if ((ret = avcodec_open2(ffmpeg_dec_ctx, dec, NULL)) < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot open audio decoder\n");
     return ret;
   }
@@ -124,73 +125,73 @@ static int32_t FFmpegOpenInputFile(const char *filename) {
 }
 
 static int32_t FFmpegInitFilters(const char *filters_descr) {
-  char args[512];
-  int32_t ret = 0;
   const AVFilter *abuffersrc = avfilter_get_by_name("abuffer");
   const AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
   AVFilterInOut *outputs = avfilter_inout_alloc();
   AVFilterInOut *inputs = avfilter_inout_alloc();
-  static const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16,
-                                                        AV_SAMPLE_FMT_NONE};
-  static const int32_t out_sample_rates[] = {16000, -1};
-  const AVFilterLink *outlink;
-  AVRational time_base = fmt_ctx->streams[audio_stream_index]->time_base;
+  AVRational time_base =
+      ffmpeg_fmt_ctx->streams[ffmpeg_audio_stream_index]->time_base;
 
-  filter_graph = avfilter_graph_alloc();
-  if (!outputs || !inputs || !filter_graph) {
+  int32_t ret;
+  ffmpeg_filter_graph = avfilter_graph_alloc();
+  if (!outputs || !inputs || !ffmpeg_filter_graph) {
     ret = AVERROR(ENOMEM);
     goto end;
   }
 
   /* buffer audio source: the decoded frames from the decoder will be inserted
    * here. */
-  if (dec_ctx->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
-    av_channel_layout_default(&dec_ctx->ch_layout,
-                              dec_ctx->ch_layout.nb_channels);
+  if (ffmpeg_dec_ctx->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
+    av_channel_layout_default(&ffmpeg_dec_ctx->ch_layout,
+                              ffmpeg_dec_ctx->ch_layout.nb_channels);
+  char args[512];
   ret = snprintf(args, sizeof(args),
                  "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=",
-                 time_base.num, time_base.den, dec_ctx->sample_rate,
-                 av_get_sample_fmt_name(dec_ctx->sample_fmt));
-  av_channel_layout_describe(&dec_ctx->ch_layout, args + ret,
+                 time_base.num, time_base.den, ffmpeg_dec_ctx->sample_rate,
+                 av_get_sample_fmt_name(ffmpeg_dec_ctx->sample_fmt));
+  av_channel_layout_describe(&ffmpeg_dec_ctx->ch_layout, args + ret,
                              sizeof(args) - ret);
-  ret = avfilter_graph_create_filter(&buffersrc_ctx, abuffersrc, "in", args,
-                                     NULL, filter_graph);
+  ret = avfilter_graph_create_filter(&ffmpeg_buffersrc_ctx, abuffersrc, "in",
+                                     args, NULL, ffmpeg_filter_graph);
   if (ret < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
     goto end;
   }
 
   /* buffer audio sink: to terminate the filter chain. */
-  ret = avfilter_graph_create_filter(&buffersink_ctx, abuffersink, "out", NULL,
-                                     NULL, filter_graph);
+  ret = avfilter_graph_create_filter(&ffmpeg_buffersink_ctx, abuffersink, "out",
+                                     NULL, NULL, ffmpeg_filter_graph);
   if (ret < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
     goto end;
   }
 
-  ret = av_opt_set_int_list(buffersink_ctx, "sample_fmts", out_sample_fmts, -1,
-                            AV_OPT_SEARCH_CHILDREN);
+  static const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16,
+                                                        AV_SAMPLE_FMT_NONE};
+  ret = av_opt_set_int_list(ffmpeg_buffersink_ctx, "sample_fmts",
+                            out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN);
   if (ret < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
     goto end;
   }
 
-  ret =
-      av_opt_set(buffersink_ctx, "ch_layouts", "mono", AV_OPT_SEARCH_CHILDREN);
+  ret = av_opt_set(ffmpeg_buffersink_ctx, "ch_layouts", "mono",
+                   AV_OPT_SEARCH_CHILDREN);
   if (ret < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
     goto end;
   }
 
-  ret = av_opt_set_int_list(buffersink_ctx, "sample_rates", out_sample_rates,
-                            -1, AV_OPT_SEARCH_CHILDREN);
+  static const int32_t out_sample_rates[] = {16000, -1};
+  ret = av_opt_set_int_list(ffmpeg_buffersink_ctx, "sample_rates",
+                            out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN);
   if (ret < 0) {
     av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
     goto end;
   }
 
   /*
-   * Set the endpoints for the filter graph. The filter_graph will
+   * Set the endpoints for the filter graph. The ffmpeg_filter_graph will
    * be linked to the graph described by filters_descr.
    */
 
@@ -201,7 +202,7 @@ static int32_t FFmpegInitFilters(const char *filters_descr) {
    * default.
    */
   outputs->name = av_strdup("in");
-  outputs->filter_ctx = buffersrc_ctx;
+  outputs->filter_ctx = ffmpeg_buffersrc_ctx;
   outputs->pad_idx = 0;
   outputs->next = NULL;
 
@@ -212,19 +213,20 @@ static int32_t FFmpegInitFilters(const char *filters_descr) {
    * default.
    */
   inputs->name = av_strdup("out");
-  inputs->filter_ctx = buffersink_ctx;
+  inputs->filter_ctx = ffmpeg_buffersink_ctx;
   inputs->pad_idx = 0;
   inputs->next = NULL;
 
-  if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr, &inputs,
-                                      &outputs, NULL)) < 0)
+  if ((ret = avfilter_graph_parse_ptr(ffmpeg_filter_graph, filters_descr,
+                                      &inputs, &outputs, NULL)) < 0)
     goto end;
 
-  if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) goto end;
+  if ((ret = avfilter_graph_config(ffmpeg_filter_graph, NULL)) < 0) goto end;
 
   /* Print summary of the sink buffer
    * Note: args buffer is reused to store channel layout string */
-  outlink = buffersink_ctx->inputs[0];
+  const AVFilterLink *outlink;
+  outlink = ffmpeg_buffersink_ctx->inputs[0];
   av_channel_layout_describe(&outlink->ch_layout, args, sizeof(args));
   fprintf(
       stdout,
@@ -242,18 +244,16 @@ end:
   return ret;
 }
 
-static void FFmpegDecodeFrame(const AVFrame *frame,
-                              const sherpa_ncnn::Recognizer &recognizer,
-                              sherpa_ncnn::Stream *s,
-                              sherpa_ncnn::Display *display,
-                              std::string *last_text, int32_t *segment_index,
-                              int32_t *zero_samples) {
+static void FFmpegOnDecodedFrame(const AVFrame *frame,
+                                 const sherpa_ncnn::Recognizer &recognizer,
+                                 sherpa_ncnn::Stream *s,
+                                 sherpa_ncnn::Display *display,
+                                 std::string *last_text, int32_t *segment_index,
+                                 int32_t *zero_samples) {
   // TODO: FIXME: Can we directly consume frame by s without buffer?
 #define N 3200  // 0.2 s. Sample rate is fixed to 16 kHz
   static float samples[N];
   static int32_t nb_samples = 0;
-  const int16_t *p = (int16_t *)frame->data[0];
-
   if (frame->nb_samples + nb_samples >= N) {
     s->AcceptWaveform(16000, samples, nb_samples);
 
@@ -284,6 +284,7 @@ static void FFmpegDecodeFrame(const AVFrame *frame,
     nb_samples = 0;
   }
 
+  const int16_t *p = (int16_t *)frame->data[0];
   for (int32_t i = 0; i < frame->nb_samples; i++) {
     if (p[i] == 0) {
       (*zero_samples)++;
@@ -604,8 +605,9 @@ for a list of pre-trained models to download.
   fprintf(stdout, "Event:FFmpeg: Open input ok, %s\n", input_url.c_str());
   fflush(stdout);
 
-  if ((ret = FFmpegInitFilters(filter_descr)) < 0) {
-    fprintf(stderr, "Init filters %s failed, r0=%d\n", filter_descr, ret);
+  if ((ret = FFmpegInitFilters(ffmpeg_filter_descr)) < 0) {
+    fprintf(stderr, "Init filters %s failed, r0=%d\n", ffmpeg_filter_descr,
+            ret);
     exit(1);
   }
 
@@ -617,7 +619,7 @@ for a list of pre-trained models to download.
   int32_t segment_index = 0, zero_samples = 0, asd_segment = 0;
   std::unique_ptr<sherpa_ncnn::Display> display = CreateDisplay();
   while (1) {
-    if ((ret = av_read_frame(fmt_ctx, packet)) < 0) {
+    if ((ret = av_read_frame(ffmpeg_fmt_ctx, packet)) < 0) {
       break;
     }
 
@@ -640,8 +642,8 @@ for a list of pre-trained models to download.
       zero_samples = 0;
     }
 
-    if (packet->stream_index == audio_stream_index) {
-      ret = avcodec_send_packet(dec_ctx, packet);
+    if (packet->stream_index == ffmpeg_audio_stream_index) {
+      ret = avcodec_send_packet(ffmpeg_dec_ctx, packet);
       if (ret < 0) {
         av_log(NULL, AV_LOG_ERROR,
                "Error while sending a packet to the decoder\n");
@@ -649,7 +651,7 @@ for a list of pre-trained models to download.
       }
 
       while (ret >= 0) {
-        ret = avcodec_receive_frame(dec_ctx, frame);
+        ret = avcodec_receive_frame(ffmpeg_dec_ctx, frame);
         if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
           break;
         } else if (ret < 0) {
@@ -660,7 +662,7 @@ for a list of pre-trained models to download.
 
         if (ret >= 0) {
           /* push the audio data from decoded frame into the filtergraph */
-          if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame,
+          if (av_buffersrc_add_frame_flags(ffmpeg_buffersrc_ctx, frame,
                                            AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
             av_log(NULL, AV_LOG_ERROR,
                    "Error while feeding the audio filtergraph\n");
@@ -669,7 +671,7 @@ for a list of pre-trained models to download.
 
           /* pull filtered audio from the filtergraph */
           while (1) {
-            ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
+            ret = av_buffersink_get_frame(ffmpeg_buffersink_ctx, filt_frame);
             if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
               break;
             }
@@ -677,8 +679,8 @@ for a list of pre-trained models to download.
               fprintf(stderr, "Error get frame, ret=%d\n", ret);
               exit(1);
             }
-            FFmpegDecodeFrame(filt_frame, recognizer, s.get(), display.get(),
-                              &last_text, &segment_index, &zero_samples);
+            FFmpegOnDecodedFrame(filt_frame, recognizer, s.get(), display.get(),
+                                 &last_text, &segment_index, &zero_samples);
             av_frame_unref(filt_frame);
           }
           av_frame_unref(frame);
@@ -689,26 +691,28 @@ for a list of pre-trained models to download.
   }
 
   // Add some tail padding
-  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
-  s->AcceptWaveform(16000, tail_paddings, 4800);
+  if (1) {
+    float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
+    s->AcceptWaveform(16000, tail_paddings, 4800);
 
-  s->InputFinished();
+    s->InputFinished();
 
-  while (recognizer.IsReady(s.get())) {
-    recognizer.DecodeStream(s.get());
-  }
+    while (recognizer.IsReady(s.get())) {
+      recognizer.DecodeStream(s.get());
+    }
 
-  auto text = recognizer.GetResult(s.get()).text;
-  if (!text.empty() && last_text != text) {
-    last_text = text;
-    std::transform(text.begin(), text.end(), text.begin(),
-                   [](auto c) { return std::tolower(c); });
-    display->Print(segment_index, text);
+    auto text = recognizer.GetResult(s.get()).text;
+    if (!text.empty() && last_text != text) {
+      last_text = text;
+      std::transform(text.begin(), text.end(), text.begin(),
+                     [](auto c) { return std::tolower(c); });
+      display->Print(segment_index, text);
+    }
   }
 
-  avfilter_graph_free(&filter_graph);
-  avcodec_free_context(&dec_ctx);
-  avformat_close_input(&fmt_ctx);
+  avfilter_graph_free(&ffmpeg_filter_graph);
+  avcodec_free_context(&ffmpeg_dec_ctx);
+  avformat_close_input(&ffmpeg_fmt_ctx);
   av_packet_free(&packet);
   av_frame_free(&frame);
   av_frame_free(&filt_frame);