Update app.py
Browse files
app.py
CHANGED
|
@@ -677,6 +677,8 @@ class GroqRequest(BaseModel):
|
|
| 677 |
language: Optional[str] = None
|
| 678 |
temperature: Optional[float] = 0.4
|
| 679 |
has_bg_music: Optional[bool] = False # Default to False for speed/resources
|
|
|
|
|
|
|
| 680 |
|
| 681 |
def groq_json_to_srt(data):
|
| 682 |
"""Converte resposta verbose_json do Whisper/Groq para SRT"""
|
|
@@ -706,9 +708,9 @@ def groq_json_to_srt(data):
|
|
| 706 |
|
| 707 |
return srt_output
|
| 708 |
|
| 709 |
-
from srt_utils import apply_netflix_style_filter, process_audio_for_transcription
|
| 710 |
|
| 711 |
-
async def get_groq_srt_base(url: str, language: Optional[str] = None, temperature: Optional[float] = 0.4, has_bg_music: bool = False):
|
| 712 |
"""
|
| 713 |
Helper para gerar SRT base usando Groq (dando suporte a filtro Netflix).
|
| 714 |
Retorna (srt_filtered, srt_word_level, processed_audio_url)
|
|
@@ -748,7 +750,7 @@ async def get_groq_srt_base(url: str, language: Optional[str] = None, temperatur
|
|
| 748 |
# 2. Pré-processar (Remover ruído, filtrar voz, etc)
|
| 749 |
print(f"🔊 [Groq] Pré-processando áudio (has_bg_music={has_bg_music})...")
|
| 750 |
# O process_audio cria um arquivo novo com .processed.mp3
|
| 751 |
-
processed_file_path = process_audio_for_transcription(filepath, has_bg_music=has_bg_music)
|
| 752 |
|
| 753 |
# Se processou, o caminho mudou. Vamos ver.
|
| 754 |
if processed_file_path != filepath:
|
|
@@ -845,9 +847,16 @@ async def generate_subtitle_groq(request: GroqRequest):
|
|
| 845 |
url=request.url,
|
| 846 |
language=request.language,
|
| 847 |
temperature=request.temperature,
|
| 848 |
-
has_bg_music=request.has_bg_music
|
|
|
|
|
|
|
| 849 |
)
|
| 850 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 851 |
return JSONResponse(content={
|
| 852 |
"srt": srt_filtered,
|
| 853 |
"srt_word": srt_word
|
|
@@ -865,6 +874,8 @@ class GeminiSubtitleRequest(BaseModel):
|
|
| 865 |
has_bg_music: Optional[bool] = False
|
| 866 |
context: Optional[str] = "N/A"
|
| 867 |
model: Optional[str] = "flash" # 'flash' or 'thinking'
|
|
|
|
|
|
|
| 868 |
|
| 869 |
@app.post("/subtitle")
|
| 870 |
async def generate_subtitle(request: GeminiSubtitleRequest):
|
|
@@ -886,7 +897,9 @@ async def generate_subtitle(request: GeminiSubtitleRequest):
|
|
| 886 |
url=request.url,
|
| 887 |
language="en",
|
| 888 |
temperature=0.4,
|
| 889 |
-
has_bg_music=request.has_bg_music
|
|
|
|
|
|
|
| 890 |
)
|
| 891 |
|
| 892 |
# Converter URL /static/xyz.mp3 para path local
|
|
@@ -940,6 +953,17 @@ INSTRUÇÕES/CONTEXTO DO USUÁRIO: {processed_context}
|
|
| 940 |
# Limpar markdown do SRT se houver
|
| 941 |
cleaned_srt = clean_and_validate_srt(content)
|
| 942 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
return JSONResponse(content={
|
| 944 |
"srt": cleaned_srt,
|
| 945 |
"original_srt": srt_filtered,
|
|
|
|
| 677 |
language: Optional[str] = None
|
| 678 |
temperature: Optional[float] = 0.4
|
| 679 |
has_bg_music: Optional[bool] = False # Default to False for speed/resources
|
| 680 |
+
time_start: Optional[float] = None
|
| 681 |
+
time_end: Optional[float] = None
|
| 682 |
|
| 683 |
def groq_json_to_srt(data):
|
| 684 |
"""Converte resposta verbose_json do Whisper/Groq para SRT"""
|
|
|
|
| 708 |
|
| 709 |
return srt_output
|
| 710 |
|
| 711 |
+
from srt_utils import apply_netflix_style_filter, process_audio_for_transcription, shift_srt_timestamps
|
| 712 |
|
| 713 |
+
async def get_groq_srt_base(url: str, language: Optional[str] = None, temperature: Optional[float] = 0.4, has_bg_music: bool = False, time_start: float = None, time_end: float = None):
|
| 714 |
"""
|
| 715 |
Helper para gerar SRT base usando Groq (dando suporte a filtro Netflix).
|
| 716 |
Retorna (srt_filtered, srt_word_level, processed_audio_url)
|
|
|
|
| 750 |
# 2. Pré-processar (Remover ruído, filtrar voz, etc)
|
| 751 |
print(f"🔊 [Groq] Pré-processando áudio (has_bg_music={has_bg_music})...")
|
| 752 |
# O process_audio cria um arquivo novo com .processed.mp3
|
| 753 |
+
processed_file_path = process_audio_for_transcription(filepath, has_bg_music=has_bg_music, time_start=time_start, time_end=time_end)
|
| 754 |
|
| 755 |
# Se processou, o caminho mudou. Vamos ver.
|
| 756 |
if processed_file_path != filepath:
|
|
|
|
| 847 |
url=request.url,
|
| 848 |
language=request.language,
|
| 849 |
temperature=request.temperature,
|
| 850 |
+
has_bg_music=request.has_bg_music,
|
| 851 |
+
time_start=request.time_start,
|
| 852 |
+
time_end=request.time_end
|
| 853 |
)
|
| 854 |
|
| 855 |
+
# Shift timestamps if needed
|
| 856 |
+
if request.time_start and request.time_start > 0:
|
| 857 |
+
srt_filtered = shift_srt_timestamps(srt_filtered, request.time_start)
|
| 858 |
+
srt_word = shift_srt_timestamps(srt_word, request.time_start)
|
| 859 |
+
|
| 860 |
return JSONResponse(content={
|
| 861 |
"srt": srt_filtered,
|
| 862 |
"srt_word": srt_word
|
|
|
|
| 874 |
has_bg_music: Optional[bool] = False
|
| 875 |
context: Optional[str] = "N/A"
|
| 876 |
model: Optional[str] = "flash" # 'flash' or 'thinking'
|
| 877 |
+
time_start: Optional[float] = None
|
| 878 |
+
time_end: Optional[float] = None
|
| 879 |
|
| 880 |
@app.post("/subtitle")
|
| 881 |
async def generate_subtitle(request: GeminiSubtitleRequest):
|
|
|
|
| 897 |
url=request.url,
|
| 898 |
language="en",
|
| 899 |
temperature=0.4,
|
| 900 |
+
has_bg_music=request.has_bg_music,
|
| 901 |
+
time_start=request.time_start,
|
| 902 |
+
time_end=request.time_end
|
| 903 |
)
|
| 904 |
|
| 905 |
# Converter URL /static/xyz.mp3 para path local
|
|
|
|
| 953 |
# Limpar markdown do SRT se houver
|
| 954 |
cleaned_srt = clean_and_validate_srt(content)
|
| 955 |
|
| 956 |
+
# Shift final timestamps if needed
|
| 957 |
+
if request.time_start and request.time_start > 0:
|
| 958 |
+
cleaned_srt = shift_srt_timestamps(cleaned_srt, request.time_start)
|
| 959 |
+
# original_srt was already shifted? No, srt_filtered comes from get_groq_srt_base which is 0-based
|
| 960 |
+
# But wait, did we shift srt_filtered before sending to Gemini?
|
| 961 |
+
# NO. srt_filtered is 0-based.
|
| 962 |
+
# So send 0-based to Gemini. Gemini returns 0-based.
|
| 963 |
+
# We shift cleaned_srt.
|
| 964 |
+
# Optionally shift original_srt for reference
|
| 965 |
+
srt_filtered = shift_srt_timestamps(srt_filtered, request.time_start)
|
| 966 |
+
|
| 967 |
return JSONResponse(content={
|
| 968 |
"srt": cleaned_srt,
|
| 969 |
"original_srt": srt_filtered,
|