habulaj commited on
Commit
51397fc
·
verified ·
1 Parent(s): 513121f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -12
app.py CHANGED
@@ -813,30 +813,59 @@ async def generate_subtitle(
813
  log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
814
 
815
  # 2. Traduzir com Gemini
816
- log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR...")
817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
  context_instruction = ""
819
  if context:
820
  context_instruction = f"\n6. **USER INSTRUCTIONS**: {context}\n"
821
 
822
- prompt = f"""Task: Translate the following SRT subtitles to Brazilian Portuguese (PT-BR).
823
 
824
- CRITICAL RULES:
825
- 1. **TIMESTAMPS**: Do NOT touch the timestamps. Keep them exactly as they are.
826
- 2. **FORMATTING**: Do NOT change the line breaks or the number of lines. Keep the exact visual structure of the original SRT.
827
- 3. **CONTEXT**: Translate naturally for a Brazilian audience.
828
- 4. **PUNCTUATION**: Adjust punctuation to fit the context best (add question marks, exclamation marks, or periods where semantically appropriate).
829
- 5. **OUTPUT**: Return ONLY the translated SRT content. No extra text.{context_instruction}
830
 
831
- ORIGINAL SRT:
 
 
 
 
 
 
 
 
832
  {srt_base}
833
  """
834
 
835
  # Helper function for retry logic
836
- async def ask_with_retry(chatbot, prompt):
837
  max_retries = 3
838
  for attempt in range(max_retries):
839
- response = await chatbot.ask(prompt)
 
 
 
 
 
 
 
840
  if response.get("error"):
841
  content = response.get("content", "")
842
  if "Failed to parse response body" in content and attempt < max_retries - 1:
@@ -847,8 +876,15 @@ ORIGINAL SRT:
847
  return response
848
  return response
849
 
850
- response_gemini = await ask_with_retry(selected_chatbot, prompt)
851
 
 
 
 
 
 
 
 
852
  if response_gemini.get("error"):
853
  raise HTTPException(
854
  status_code=500,
 
813
  log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
814
 
815
  # 2. Traduzir com Gemini
816
+ log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR (com contexto de vídeo)...")
817
 
818
+ # Baixar arquivo para enviar ao Gemini
819
+ log(f"⬇️ Baixando mídia para contexto...")
820
+ response = download_file_with_retry(file)
821
+ content_type = response.headers.get('content-type', '').lower()
822
+
823
+ file_extension = '.mp4'
824
+ media_type = 'video'
825
+ if 'audio' in content_type:
826
+ file_extension = '.mp3'
827
+ media_type = 'audio'
828
+
829
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=file_extension)
830
+ for chunk in response.iter_content(chunk_size=8192):
831
+ if chunk:
832
+ temp_file.write(chunk)
833
+ temp_file.close()
834
+ log(f"✅ Mídia baixada: {temp_file.name}")
835
+
836
  context_instruction = ""
837
  if context:
838
  context_instruction = f"\n6. **USER INSTRUCTIONS**: {context}\n"
839
 
840
+ prompt = f"""Task: Translate and Correct the SRT subtitles to Brazilian Portuguese (PT-BR).
841
 
842
+ SOURCE MATERIAL: I have attached the video/audio file. Use it as the PRIMARY source of truth for context, speaker identification, and meaning.
843
+ INPUT SRT: I have provided a draft SRT generated by an automated transcription service.
 
 
 
 
844
 
845
+ CRITICAL INSTRUCTIONS:
846
+ 1. **CORRECTION**: The input SRT is a draft. It may have wrong words or misinterpretations. **You MUST correct the text based on the actual audio.**
847
+ 2. **TIMESTAMPS**: The timestamps in the input SRT are generally correct. **Keep them as much as possible**, only adjust if they are clearly wrong / desynchronized.
848
+ 3. **FORMATTING**: Do NOT change the line breaks or the number of lines. The visual structure (2 lines max, balanced) is already optimized.
849
+ 4. **CONTEXT**: Translate naturally for a Brazilian audience.
850
+ 5. **PUNCTUATION**: Adjust punctuation to fit the context best.
851
+ 6. **OUTPUT**: Return ONLY the corrected and translated SRT content. No extra text.{context_instruction}
852
+
853
+ DRAFT SRT:
854
  {srt_base}
855
  """
856
 
857
  # Helper function for retry logic
858
+ async def ask_with_retry(chatbot, prompt, media_file, media_type):
859
  max_retries = 3
860
  for attempt in range(max_retries):
861
+ # Passar video ou audio
862
+ kwargs = {}
863
+ if media_type == 'video':
864
+ kwargs['video'] = media_file
865
+ else:
866
+ kwargs['audio'] = media_file
867
+
868
+ response = await chatbot.ask(prompt, **kwargs)
869
  if response.get("error"):
870
  content = response.get("content", "")
871
  if "Failed to parse response body" in content and attempt < max_retries - 1:
 
876
  return response
877
  return response
878
 
879
+ response_gemini = await ask_with_retry(selected_chatbot, prompt, temp_file.name, media_type)
880
 
881
+ # Limpar arquivo temporário
882
+ if os.path.exists(temp_file.name):
883
+ try:
884
+ os.unlink(temp_file.name)
885
+ except:
886
+ pass
887
+
888
  if response_gemini.get("error"):
889
  raise HTTPException(
890
  status_code=500,