habulaj commited on
Commit
d477136
·
verified ·
1 Parent(s): 5e4660f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -34
app.py CHANGED
@@ -740,35 +740,65 @@ async def get_groq_srt_base(url: str, language: Optional[str] = None, temperatur
740
 
741
  processed_audio_url = None
742
  processed_filename = None
 
743
 
744
  try:
745
- # 2. Pré-processar (Remover ruído, filtrar voz, etc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  print(f"🔊 [Groq] Pré-processando áudio (has_bg_music={has_bg_music})...")
747
- # O process_audio cria um arquivo novo com .processed.mp3
748
  processed_file_path = process_audio_for_transcription(filepath, has_bg_music=has_bg_music, time_start=time_start, time_end=time_end)
749
 
750
- # Se processou, o caminho mudou. Vamos ver.
751
  if processed_file_path != filepath:
752
- # Vamos mover/renomear para garantir que esteja em static com nome limpo?
753
- # Na verdade o process_utils salva como <input>.processed.mp3
754
- # Então já está em static/
755
  pass
756
 
757
- # Gerar URL pública (assumindo que o host é acessível)
758
- # Como não sabemos o domínio exato, vamos retornar caminho relativo /static/...
759
- # O cliente pode compor. Ou tentamos pegar do request se tivessemos acesso.
760
  processed_filename = os.path.basename(processed_file_path)
761
  processed_audio_url = f"/static/{processed_filename}"
762
 
763
- # 3. Enviar para Groq
764
- groq_url = "https://api.groq.com/openai/v1/audio/transcriptions"
765
- headers = {
766
- "Authorization": f"Bearer {GROQ_API_KEY}"
767
- }
768
-
769
- # Abrir arquivo processado
770
  with open(processed_file_path, "rb") as f:
771
- # Usar lista de tuplas para suportar múltiplos valores de timestamp_granularities[]
772
  files = [
773
  ("model", (None, "whisper-large-v3")),
774
  ("file", ("audio.mp3", f, "audio/mpeg")),
@@ -788,12 +818,9 @@ async def get_groq_srt_base(url: str, language: Optional[str] = None, temperatur
788
 
789
  for attempt in range(max_retries):
790
  try:
791
- # Precisamos resetar o ponteiro do arquivo se for retry?
792
- # O requests deve ler tudo. Se falhar, na proxima tentativa, o 'f' ja foi lido.
793
- # Mover seek(0) é importante.
794
  f.seek(0)
795
 
796
- response_groq = requests.post(groq_url, headers=headers, files=files, timeout=300)
797
 
798
  if response_groq.status_code == 200:
799
  result = response_groq.json()
@@ -819,21 +846,16 @@ async def get_groq_srt_base(url: str, language: Optional[str] = None, temperatur
819
  raise HTTPException(status_code=500, detail=f"Erro conexão Groq: {e}")
820
 
821
  finally:
822
- # Cleanup?
823
- # O usuário pediu para "hospedar em arquivo temporario... mande a url".
824
- # Então NÃO deletamos o arquivo processado imediatamente.
825
- # Mas deletamos o original baixado para economizar espaço?
826
  if filepath and os.path.exists(filepath) and filepath != processed_file_path:
827
  try: os.unlink(filepath)
828
  except: pass
829
 
830
  # Converter para SRT
831
  srt_base = groq_json_to_srt(result)
832
- word_level_text = groq_words_to_text(result)
833
- # srt_filtered = apply_netflix_style_filter(srt_word)
834
- # REMOVIDO FILTRO NETFLIX - Retornando raw Whisper segments
835
 
836
- return srt_base, srt_base, processed_audio_url, word_level_text
837
 
838
  @app.post("/subtitle/groq")
839
  async def generate_subtitle_groq(request: GroqRequest):
@@ -842,7 +864,7 @@ async def generate_subtitle_groq(request: GroqRequest):
842
  Agora envia a URL diretamente para a API do Groq e aplica filtro Netflix.
843
  """
844
  try:
845
- srt_filtered, srt_word, processed_audio_url, _word_level = await get_groq_srt_base(
846
  url=request.url,
847
  language=request.language,
848
  temperature=request.temperature,
@@ -892,7 +914,7 @@ async def generate_subtitle(request: GeminiSubtitleRequest):
892
  # 1. Obter SRT base + Caminho do áudio processado
893
  print("🚀 Iniciando pipeline completo de legendagem Gemini...")
894
 
895
- srt_filtered, srt_word, processed_audio_url, word_level_text = await get_groq_srt_base(
896
  url=request.url,
897
  language="en",
898
  temperature=0.4,
@@ -1012,8 +1034,8 @@ INSTRUÇÕES/CONTEXTO DO USUÁRIO (OPCIONAL): {processed_context}
1012
  --- LEGENDA BASE (WHISPER) ---
1013
  {srt_filtered}
1014
 
1015
- --- TIMESTAMPS POR PALAVRA (WORD-LEVEL) ---
1016
- {word_level_text}
1017
  """
1018
 
1019
  # 4. Enviar para Gemini
@@ -1040,7 +1062,8 @@ INSTRUÇÕES/CONTEXTO DO USUÁRIO (OPCIONAL): {processed_context}
1040
  return JSONResponse(content={
1041
  "srt": cleaned_srt,
1042
  "original_srt": srt_filtered,
1043
- "srt_word_level": word_level_text,
 
1044
  "used_audio_processed": True
1045
  })
1046
 
 
740
 
741
  processed_audio_url = None
742
  processed_filename = None
743
+ raw_word_level_text = ""
744
 
745
  try:
746
+ # 2a. Chamada Groq com áudio ORIGINAL (raw) para word-level timestamps fiéis ao vídeo
747
+ groq_url = "https://api.groq.com/openai/v1/audio/transcriptions"
748
+ groq_headers = {
749
+ "Authorization": f"Bearer {GROQ_API_KEY}"
750
+ }
751
+
752
+ print(f"🧠 [Groq] Enviando ÁUDIO ORIGINAL para word-level timestamps...")
753
+ with open(filepath, "rb") as f_raw:
754
+ raw_files = [
755
+ ("model", (None, "whisper-large-v3")),
756
+ ("file", ("audio.mp3", f_raw, "audio/mpeg")),
757
+ ("temperature", (None, str(temperature))),
758
+ ("response_format", (None, "verbose_json")),
759
+ ("timestamp_granularities[]", (None, "word"))
760
+ ]
761
+ if language and language in GROQ_SUPPORTED_LANGUAGES:
762
+ raw_files.append(("language", (None, language)))
763
+
764
+ raw_result = None
765
+ for attempt in range(3):
766
+ try:
767
+ f_raw.seek(0)
768
+ resp_raw = requests.post(groq_url, headers=groq_headers, files=raw_files, timeout=300)
769
+ if resp_raw.status_code == 200:
770
+ raw_result = resp_raw.json()
771
+ break
772
+ if resp_raw.status_code >= 500 and attempt < 2:
773
+ await asyncio.sleep(2 * (attempt + 1))
774
+ continue
775
+ print(f"⚠️ [Groq Raw] Erro {resp_raw.status_code}: {resp_raw.text[:200]}")
776
+ break
777
+ except requests.RequestException as e:
778
+ if attempt < 2:
779
+ await asyncio.sleep(2)
780
+ continue
781
+ print(f"⚠️ [Groq Raw] Erro conexão: {e}")
782
+ break
783
+
784
+ if raw_result:
785
+ raw_word_level_text = groq_words_to_text(raw_result)
786
+ print(f"✅ [Groq Raw] Word-level obtido: {len(raw_result.get('words') or [])} palavras")
787
+ else:
788
+ print(f"⚠️ [Groq Raw] Falha ao obter word-level do áudio original, continuando...")
789
+
790
+ # 2b. Pré-processar (Remover ruído, filtrar voz, etc)
791
  print(f"🔊 [Groq] Pré-processando áudio (has_bg_music={has_bg_music})...")
 
792
  processed_file_path = process_audio_for_transcription(filepath, has_bg_music=has_bg_music, time_start=time_start, time_end=time_end)
793
 
 
794
  if processed_file_path != filepath:
 
 
 
795
  pass
796
 
 
 
 
797
  processed_filename = os.path.basename(processed_file_path)
798
  processed_audio_url = f"/static/{processed_filename}"
799
 
800
+ # 3. Enviar áudio PROCESSADO para Groq (segments + word-level)
 
 
 
 
 
 
801
  with open(processed_file_path, "rb") as f:
 
802
  files = [
803
  ("model", (None, "whisper-large-v3")),
804
  ("file", ("audio.mp3", f, "audio/mpeg")),
 
818
 
819
  for attempt in range(max_retries):
820
  try:
 
 
 
821
  f.seek(0)
822
 
823
+ response_groq = requests.post(groq_url, headers=groq_headers, files=files, timeout=300)
824
 
825
  if response_groq.status_code == 200:
826
  result = response_groq.json()
 
846
  raise HTTPException(status_code=500, detail=f"Erro conexão Groq: {e}")
847
 
848
  finally:
849
+ # Cleanup do arquivo original
 
 
 
850
  if filepath and os.path.exists(filepath) and filepath != processed_file_path:
851
  try: os.unlink(filepath)
852
  except: pass
853
 
854
  # Converter para SRT
855
  srt_base = groq_json_to_srt(result)
856
+ word_level_processed = groq_words_to_text(result)
 
 
857
 
858
+ return srt_base, srt_base, processed_audio_url, raw_word_level_text, word_level_processed
859
 
860
  @app.post("/subtitle/groq")
861
  async def generate_subtitle_groq(request: GroqRequest):
 
864
  Agora envia a URL diretamente para a API do Groq e aplica filtro Netflix.
865
  """
866
  try:
867
+ srt_filtered, srt_word, processed_audio_url, _raw_wl, _proc_wl = await get_groq_srt_base(
868
  url=request.url,
869
  language=request.language,
870
  temperature=request.temperature,
 
914
  # 1. Obter SRT base + Caminho do áudio processado
915
  print("🚀 Iniciando pipeline completo de legendagem Gemini...")
916
 
917
+ srt_filtered, srt_word, processed_audio_url, raw_word_level, processed_word_level = await get_groq_srt_base(
918
  url=request.url,
919
  language="en",
920
  temperature=0.4,
 
1034
  --- LEGENDA BASE (WHISPER) ---
1035
  {srt_filtered}
1036
 
1037
+ --- TIMESTAMPS POR PALAVRA (WORD-LEVEL, ÁUDIO ORIGINAL) ---
1038
+ {raw_word_level}
1039
  """
1040
 
1041
  # 4. Enviar para Gemini
 
1062
  return JSONResponse(content={
1063
  "srt": cleaned_srt,
1064
  "original_srt": srt_filtered,
1065
+ "srt_word_level_raw": raw_word_level,
1066
+ "srt_word_level_processed": processed_word_level,
1067
  "used_audio_processed": True
1068
  })
1069