Update app.py
Browse files
app.py
CHANGED
|
@@ -813,30 +813,59 @@ async def generate_subtitle(
|
|
| 813 |
log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
|
| 814 |
|
| 815 |
# 2. Traduzir com Gemini
|
| 816 |
-
log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR...")
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
context_instruction = ""
|
| 819 |
if context:
|
| 820 |
context_instruction = f"\n6. **USER INSTRUCTIONS**: {context}\n"
|
| 821 |
|
| 822 |
-
prompt = f"""Task: Translate the
|
| 823 |
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
2. **FORMATTING**: Do NOT change the line breaks or the number of lines. Keep the exact visual structure of the original SRT.
|
| 827 |
-
3. **CONTEXT**: Translate naturally for a Brazilian audience.
|
| 828 |
-
4. **PUNCTUATION**: Adjust punctuation to fit the context best (add question marks, exclamation marks, or periods where semantically appropriate).
|
| 829 |
-
5. **OUTPUT**: Return ONLY the translated SRT content. No extra text.{context_instruction}
|
| 830 |
|
| 831 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 832 |
{srt_base}
|
| 833 |
"""
|
| 834 |
|
| 835 |
# Helper function for retry logic
|
| 836 |
-
async def ask_with_retry(chatbot, prompt):
|
| 837 |
max_retries = 3
|
| 838 |
for attempt in range(max_retries):
|
| 839 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
if response.get("error"):
|
| 841 |
content = response.get("content", "")
|
| 842 |
if "Failed to parse response body" in content and attempt < max_retries - 1:
|
|
@@ -847,8 +876,15 @@ ORIGINAL SRT:
|
|
| 847 |
return response
|
| 848 |
return response
|
| 849 |
|
| 850 |
-
response_gemini = await ask_with_retry(selected_chatbot, prompt)
|
| 851 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 852 |
if response_gemini.get("error"):
|
| 853 |
raise HTTPException(
|
| 854 |
status_code=500,
|
|
|
|
| 813 |
log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
|
| 814 |
|
| 815 |
# 2. Traduzir com Gemini
|
| 816 |
+
log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR (com contexto de vídeo)...")
|
| 817 |
|
| 818 |
+
# Baixar arquivo para enviar ao Gemini
|
| 819 |
+
log(f"⬇️ Baixando mídia para contexto...")
|
| 820 |
+
response = download_file_with_retry(file)
|
| 821 |
+
content_type = response.headers.get('content-type', '').lower()
|
| 822 |
+
|
| 823 |
+
file_extension = '.mp4'
|
| 824 |
+
media_type = 'video'
|
| 825 |
+
if 'audio' in content_type:
|
| 826 |
+
file_extension = '.mp3'
|
| 827 |
+
media_type = 'audio'
|
| 828 |
+
|
| 829 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=file_extension)
|
| 830 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 831 |
+
if chunk:
|
| 832 |
+
temp_file.write(chunk)
|
| 833 |
+
temp_file.close()
|
| 834 |
+
log(f"✅ Mídia baixada: {temp_file.name}")
|
| 835 |
+
|
| 836 |
context_instruction = ""
|
| 837 |
if context:
|
| 838 |
context_instruction = f"\n6. **USER INSTRUCTIONS**: {context}\n"
|
| 839 |
|
| 840 |
+
prompt = f"""Task: Translate and Correct the SRT subtitles to Brazilian Portuguese (PT-BR).
|
| 841 |
|
| 842 |
+
SOURCE MATERIAL: I have attached the video/audio file. Use it as the PRIMARY source of truth for context, speaker identification, and meaning.
|
| 843 |
+
INPUT SRT: I have provided a draft SRT generated by an automated transcription service.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 844 |
|
| 845 |
+
CRITICAL INSTRUCTIONS:
|
| 846 |
+
1. **CORRECTION**: The input SRT is a draft. It may have wrong words or misinterpretations. **You MUST correct the text based on the actual audio.**
|
| 847 |
+
2. **TIMESTAMPS**: The timestamps in the input SRT are generally correct. **Keep them as much as possible**, only adjust if they are clearly wrong / desynchronized.
|
| 848 |
+
3. **FORMATTING**: Do NOT change the line breaks or the number of lines. The visual structure (2 lines max, balanced) is already optimized.
|
| 849 |
+
4. **CONTEXT**: Translate naturally for a Brazilian audience.
|
| 850 |
+
5. **PUNCTUATION**: Adjust punctuation to fit the context best.
|
| 851 |
+
6. **OUTPUT**: Return ONLY the corrected and translated SRT content. No extra text.{context_instruction}
|
| 852 |
+
|
| 853 |
+
DRAFT SRT:
|
| 854 |
{srt_base}
|
| 855 |
"""
|
| 856 |
|
| 857 |
# Helper function for retry logic
|
| 858 |
+
async def ask_with_retry(chatbot, prompt, media_file, media_type):
|
| 859 |
max_retries = 3
|
| 860 |
for attempt in range(max_retries):
|
| 861 |
+
# Passar video ou audio
|
| 862 |
+
kwargs = {}
|
| 863 |
+
if media_type == 'video':
|
| 864 |
+
kwargs['video'] = media_file
|
| 865 |
+
else:
|
| 866 |
+
kwargs['audio'] = media_file
|
| 867 |
+
|
| 868 |
+
response = await chatbot.ask(prompt, **kwargs)
|
| 869 |
if response.get("error"):
|
| 870 |
content = response.get("content", "")
|
| 871 |
if "Failed to parse response body" in content and attempt < max_retries - 1:
|
|
|
|
| 876 |
return response
|
| 877 |
return response
|
| 878 |
|
| 879 |
+
response_gemini = await ask_with_retry(selected_chatbot, prompt, temp_file.name, media_type)
|
| 880 |
|
| 881 |
+
# Limpar arquivo temporário
|
| 882 |
+
if os.path.exists(temp_file.name):
|
| 883 |
+
try:
|
| 884 |
+
os.unlink(temp_file.name)
|
| 885 |
+
except:
|
| 886 |
+
pass
|
| 887 |
+
|
| 888 |
if response_gemini.get("error"):
|
| 889 |
raise HTTPException(
|
| 890 |
status_code=500,
|