Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 16 days ago

Commit

b98dcb6

verified ·

1 Parent(s): f2593a4

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -155

app.py CHANGED Viewed

@@ -866,8 +866,8 @@ class GeminiSubtitleRequest(BaseModel):
     context: Optional[str] = "N/A"
     model: Optional[str] = "flash" # 'flash' or 'thinking'
-@app.post("/subtitle/gemini")
-async def generate_subtitle_gemini(request: GeminiSubtitleRequest):
     """
     Endpoint PRINCIPAL:
     1. Baixa e Processa áudio (Demucs opcional + Filtros FFmpeg)
@@ -907,7 +907,7 @@ async def generate_subtitle_gemini(request: GeminiSubtitleRequest):
         # 3. Montar Prompt
         processed_context = request.context if request.context else "N/A"
-        prompt = f\"""
 Traduza essa legenda pro português do Brasil, corrija qualquer erro de formatação, pontuação e mantenha timestamps e os textos nos seus respectivos blocos de legenda.
 Deve traduzir exatamente o texto da legenda observando o contexto, não é pra migrar, por exemplo, textos de um bloco de legenda pra outro. Deve traduzir exatamente o texto de cada bloco de legenda, manter sempre as palavras, nunca retirar.
@@ -942,155 +942,3 @@ INSTRUÇÕES/CONTEXTO DO USUÁRIO: {processed_context}
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))
-@app.get("/subtitle")
-async def generate_subtitle(
-    file: str,
-    context: Optional[str] = None,
-    start: Optional[float] = None,
-    end: Optional[float] = None,
-    model: Optional[str] = "thinking",
-    language: Optional[str] = None,
-    temperature: Optional[float] = 0.4
-):
-    """
-    Endpoint PRINCIPAL para gerar legendas traduzidas (PT-BR).
-    Fluxo:
-    1. Gera SRT base com Groq + Filtro Netflix (via helper /subtitle/groq logic).
-    2. Envia SRT para Gemini traduzir para PT-BR mantendo formatação.
-    """
-    logs = []
-    def log(msg):
-        print(msg)
-        logs.append(msg)
-    if not chatbots:
-        raise HTTPException(status_code=500, detail="Chatbot não inicializado")
-    # Selecionar chatbot
-    requested_model = model.lower() if model else "flash"
-    if "thinking" in requested_model:
-        selected_chatbot = chatbots.get('thinking', chatbots['default'])
-    else:
-        selected_chatbot = chatbots.get('flash', chatbots['default'])
-    if not file:
-        raise HTTPException(status_code=400, detail="Parâmetro 'file' é obrigatório")
-    try:
-        # 1. Gerar SRT Base (Groq)
-        log(f"🎤 [Fase 1] Gerando SRT base com Groq...")
-        srt_base, _, processed_audio_url = await get_groq_srt_base(file, language, temperature)
-        log(f"✅ SRT Base gerado ({len(srt_base)} chars)")
-        # 2. Traduzir com Gemini
-        log(f"🧠 [Fase 2] Enviando para Gemini ({requested_model}) para tradução PT-BR (com contexto de vídeo)...")
-        # Baixar arquivo para enviar ao Gemini
-        log(f"⬇️ Baixando mídia para contexto...")
-        response = download_file_with_retry(file)
-        content_type = response.headers.get('content-type', '').lower()
-        file_extension = '.mp4'
-        media_type = 'video'
-        if 'audio' in content_type:
-            file_extension = '.mp3'
-            media_type = 'audio'
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=file_extension)
-        for chunk in response.iter_content(chunk_size=8192):
-            if chunk:
-                temp_file.write(chunk)
-        temp_file.close()
-        log(f"✅ Mídia baixada: {temp_file.name}")
-        context_instruction = ""
-        if context:
-            context_instruction = f"\n6.  **USER INSTRUCTIONS**: {context}\n"
-        prompt = f"""Translate and Correct the SRT subtitles to Brazilian Portuguese (PT-BR).
-SOURCE MATERIAL: I have attached the video/audio file. Use it as the PRIMARY source of truth for context, speaker identification, and meaning.
-INPUT SRT: I have provided a draft SRT generated by an automated transcription service.
-CRITICAL INSTRUCTIONS:
-1.  **CORRECTION**: The input SRT is a draft. It may have wrong words or misinterpretations. **You MUST correct the text based on the actual audio.**
-2.  **TIMESTAMPS**: The timestamps in the input SRT are generally correct. **Keep them as much as possible**, only adjust if they are clearly wrong / desynchronized.
-3.  **FORMATTING**:
-    - Keep the visual structure (2 lines max).
-    - **DIALOGUES**: If two different people speak in the same block, separate them with a hyphen "- " at the start of each line.
-        *   Example Input: "I did it. So did I."
-        *   Example Output:
-            - Eu fiz isso.
-            - Eu também.
-4.  **CONTEXT**: Translate naturally for a Brazilian audience. Prioritize clarity and meaning over literal translation. Make it readable and elegant.
-5.  **PUNCTUATION**: Adjust punctuation to fit the context best.
-6.  **OUTPUT**: Return ONLY the corrected and translated SRT content. No extra text.{context_instruction}
-DRAFT SRT:
-{srt_base}
-"""
-        # Helper function for retry logic
-        async def ask_with_retry(chatbot, prompt, media_file, media_type):
-            max_retries = 3
-            for attempt in range(max_retries):
-                # Passar video ou audio
-                kwargs = {}
-                if media_type == 'video':
-                    kwargs['video'] = media_file
-                else:
-                    kwargs['audio'] = media_file
-                response = await chatbot.ask(prompt, **kwargs)
-                if response.get("error"):
-                    content = response.get("content", "")
-                    if "Failed to parse response body" in content and attempt < max_retries - 1:
-                        log(f"⚠️ Erro de parsing (tentativa {attempt+1}/{max_retries}). Retentando...")
-                        import asyncio
-                        await asyncio.sleep(2)
-                        continue
-                return response
-            return response
-        response_gemini = await ask_with_retry(selected_chatbot, prompt, temp_file.name, media_type)
-        # Limpar arquivo temporário
-        if os.path.exists(temp_file.name):
-            try:
-                os.unlink(temp_file.name)
-            except:
-                pass
-        if response_gemini.get("error"):
-             raise HTTPException(
-                status_code=500,
-                detail=f"Erro no Gemini: {response_gemini.get('content', 'Erro desconhecido')}"
-            )
-        srt_translated = response_gemini.get("content", "").strip()
-        # Limpezas básicas no output do Gemini (caso ele devolva markdowns)
-        if "```srt" in srt_translated:
-            srt_translated = srt_translated.split("```srt")[1].split("```")[0].strip()
-        elif "```" in srt_translated:
-             srt_translated = srt_translated.split("```")[1].split("```")[0].strip()
-        log(f"✅ Tradução concluída")
-        # Retornar
-        return JSONResponse(
-            content={
-                "srt": srt_translated,
-                "success": True,
-                "processed_audio_url": processed_audio_url,
-                "logs": logs # Mantendo logs para debug se necessário
-            }
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail=f"Erro ao gerar legendas: {str(e)}")

     context: Optional[str] = "N/A"
     model: Optional[str] = "flash" # 'flash' or 'thinking'
+@app.post("/subtitle")
+async def generate_subtitle(request: GeminiSubtitleRequest):
     """
     Endpoint PRINCIPAL:
     1. Baixa e Processa áudio (Demucs opcional + Filtros FFmpeg)
         # 3. Montar Prompt
         processed_context = request.context if request.context else "N/A"
+        prompt = f"""
 Traduza essa legenda pro português do Brasil, corrija qualquer erro de formatação, pontuação e mantenha timestamps e os textos nos seus respectivos blocos de legenda.
 Deve traduzir exatamente o texto da legenda observando o contexto, não é pra migrar, por exemplo, textos de um bloco de legenda pra outro. Deve traduzir exatamente o texto de cada bloco de legenda, manter sempre as palavras, nunca retirar.
         import traceback
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))