Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 29 days ago

Commit

51397fc

verified ·

1 Parent(s): 513121f

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -12

app.py CHANGED Viewed

@@ -813,30 +813,59 @@ async def generate_subtitle(
         log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
         # 2. Traduzir com Gemini
-        log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR...")
         context_instruction = ""
         if context:
             context_instruction = f"\n6.  **USER INSTRUCTIONS**: {context}\n"
-        prompt = f"""Task: Translate the following SRT subtitles to Brazilian Portuguese (PT-BR).
-CRITICAL RULES:
-1.  **TIMESTAMPS**: Do NOT touch the timestamps. Keep them exactly as they are.
-2.  **FORMATTING**: Do NOT change the line breaks or the number of lines. Keep the exact visual structure of the original SRT.
-3.  **CONTEXT**: Translate naturally for a Brazilian audience.
-4.  **PUNCTUATION**: Adjust punctuation to fit the context best (add question marks, exclamation marks, or periods where semantically appropriate).
-5.  **OUTPUT**: Return ONLY the translated SRT content. No extra text.{context_instruction}
-ORIGINAL SRT:
 {srt_base}
 """
         # Helper function for retry logic
-        async def ask_with_retry(chatbot, prompt):
             max_retries = 3
             for attempt in range(max_retries):
-                response = await chatbot.ask(prompt)
                 if response.get("error"):
                     content = response.get("content", "")
                     if "Failed to parse response body" in content and attempt < max_retries - 1:
@@ -847,8 +876,15 @@ ORIGINAL SRT:
                 return response
             return response
-        response_gemini = await ask_with_retry(selected_chatbot, prompt)
         if response_gemini.get("error"):
              raise HTTPException(
                 status_code=500,

         log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
         # 2. Traduzir com Gemini
+        log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR (com contexto de vídeo)...")
+        # Baixar arquivo para enviar ao Gemini
+        log(f"⬇️ Baixando mídia para contexto...")
+        response = download_file_with_retry(file)
+        content_type = response.headers.get('content-type', '').lower()
+        file_extension = '.mp4'
+        media_type = 'video'
+        if 'audio' in content_type:
+            file_extension = '.mp3'
+            media_type = 'audio'
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=file_extension)
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                temp_file.write(chunk)
+        temp_file.close()
+        log(f"✅ Mídia baixada: {temp_file.name}")
         context_instruction = ""
         if context:
             context_instruction = f"\n6.  **USER INSTRUCTIONS**: {context}\n"
+        prompt = f"""Task: Translate and Correct the SRT subtitles to Brazilian Portuguese (PT-BR).
+SOURCE MATERIAL: I have attached the video/audio file. Use it as the PRIMARY source of truth for context, speaker identification, and meaning.
+INPUT SRT: I have provided a draft SRT generated by an automated transcription service.
+CRITICAL INSTRUCTIONS:
+1.  **CORRECTION**: The input SRT is a draft. It may have wrong words or misinterpretations. **You MUST correct the text based on the actual audio.**
+2.  **TIMESTAMPS**: The timestamps in the input SRT are generally correct. **Keep them as much as possible**, only adjust if they are clearly wrong / desynchronized.
+3.  **FORMATTING**: Do NOT change the line breaks or the number of lines. The visual structure (2 lines max, balanced) is already optimized.
+4.  **CONTEXT**: Translate naturally for a Brazilian audience.
+5.  **PUNCTUATION**: Adjust punctuation to fit the context best.
+6.  **OUTPUT**: Return ONLY the corrected and translated SRT content. No extra text.{context_instruction}
+DRAFT SRT:
 {srt_base}
 """
         # Helper function for retry logic
+        async def ask_with_retry(chatbot, prompt, media_file, media_type):
             max_retries = 3
             for attempt in range(max_retries):
+                # Passar video ou audio
+                kwargs = {}
+                if media_type == 'video':
+                    kwargs['video'] = media_file
+                else:
+                    kwargs['audio'] = media_file
+                response = await chatbot.ask(prompt, **kwargs)
                 if response.get("error"):
                     content = response.get("content", "")
                     if "Failed to parse response body" in content and attempt < max_retries - 1:
                 return response
             return response
+        response_gemini = await ask_with_retry(selected_chatbot, prompt, temp_file.name, media_type)
+        # Limpar arquivo temporário
+        if os.path.exists(temp_file.name):
+            try:
+                os.unlink(temp_file.name)
+            except:
+                pass
         if response_gemini.get("error"):
              raise HTTPException(
                 status_code=500,