Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 3 |
from PIL import Image
|
| 4 |
import torch
|
| 5 |
from gtts import gTTS
|
|
@@ -8,6 +9,9 @@ import requests
|
|
| 8 |
import nltk.tree
|
| 9 |
import re
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
# Carregar o modelo de português do spaCy
|
| 12 |
nlp = spacy.load("pt_core_news_sm")
|
| 13 |
|
|
@@ -192,14 +196,10 @@ def reordenar_sentenca(sentenca):
|
|
| 192 |
# Carregar os modelos
|
| 193 |
processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
| 194 |
model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
| 195 |
-
translation_model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
|
| 196 |
-
translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
|
| 197 |
-
translation_model = MarianMTModel.from_pretrained(translation_model_name)
|
| 198 |
|
| 199 |
# Configurar o dispositivo (GPU ou CPU)
|
| 200 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 201 |
model.to(device)
|
| 202 |
-
translation_model.to(device)
|
| 203 |
|
| 204 |
# Funções auxiliares
|
| 205 |
def prepare_image(image_path):
|
|
@@ -219,11 +219,6 @@ def generate_caption(pixel_values):
|
|
| 219 |
)
|
| 220 |
return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 221 |
|
| 222 |
-
def translate_to_portuguese(text):
|
| 223 |
-
inputs = translation_tokenizer(text, return_tensors="pt", truncation=True).to(device)
|
| 224 |
-
translated_ids = translation_model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
|
| 225 |
-
return translation_tokenizer.batch_decode(translated_ids, skip_special_tokens=True)[0]
|
| 226 |
-
|
| 227 |
def text_to_speech_gtts(text, lang='pt'):
|
| 228 |
tts = gTTS(text=text, lang=lang)
|
| 229 |
tts.save("output.mp3")
|
|
@@ -232,8 +227,7 @@ def text_to_speech_gtts(text, lang='pt'):
|
|
| 232 |
# Função principal para processar a imagem e gerar a voz
|
| 233 |
def process_image(image):
|
| 234 |
_, pixel_values = prepare_image(image)
|
| 235 |
-
|
| 236 |
-
caption_pt = translate_to_portuguese(caption_en)
|
| 237 |
caption_pt = reordenar_sentenca(caption_pt)
|
| 238 |
audio_file = text_to_speech_gtts(caption_pt)
|
| 239 |
return caption_pt, audio_file
|
|
|
|
| 1 |
+
import os
|
| 2 |
import gradio as gr
|
| 3 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 4 |
from PIL import Image
|
| 5 |
import torch
|
| 6 |
from gtts import gTTS
|
|
|
|
| 9 |
import nltk.tree
|
| 10 |
import re
|
| 11 |
|
| 12 |
+
# Baixar o modelo de português do spaCy
|
| 13 |
+
os.system("python -m spacy download pt_core_news_sm")
|
| 14 |
+
|
| 15 |
# Carregar o modelo de português do spaCy
|
| 16 |
nlp = spacy.load("pt_core_news_sm")
|
| 17 |
|
|
|
|
| 196 |
# Carregar os modelos
|
| 197 |
processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
| 198 |
model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
# Configurar o dispositivo (GPU ou CPU)
|
| 201 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 202 |
model.to(device)
|
|
|
|
| 203 |
|
| 204 |
# Funções auxiliares
|
| 205 |
def prepare_image(image_path):
|
|
|
|
| 219 |
)
|
| 220 |
return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
def text_to_speech_gtts(text, lang='pt'):
|
| 223 |
tts = gTTS(text=text, lang=lang)
|
| 224 |
tts.save("output.mp3")
|
|
|
|
| 227 |
# Função principal para processar a imagem e gerar a voz
|
| 228 |
def process_image(image):
|
| 229 |
_, pixel_values = prepare_image(image)
|
| 230 |
+
caption_pt = generate_caption(pixel_values)
|
|
|
|
| 231 |
caption_pt = reordenar_sentenca(caption_pt)
|
| 232 |
audio_file = text_to_speech_gtts(caption_pt)
|
| 233 |
return caption_pt, audio_file
|