SandraCLV commited on
Commit
13e6a73
1 Parent(s): 36063e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -13
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  from transformers import pipeline
3
  from PIL import Image, ImageOps
4
  import torch
5
- import timm
6
 
7
 
8
  #Definir 2 modelos uno de imagen a texto y otro de texto a audio que inyecta
@@ -19,25 +18,17 @@ text_to_audio_model = pipeline("text-to-speech")
19
  # Funci贸n para la interfaz de Gradio
20
  def image_to_audio(input_image):
21
  # Convertir la imagen a texto
22
- model = timm.create_model("hf_hub:timm/mobilenetv3_large_100.ra_in1k", pretrained=True)
23
- model = model.eval()
24
  # get model specific transforms (normalization, resize)
25
- data_config = timm.data.resolve_model_data_config(model)
26
- transforms = timm.data.create_transform(**data_config, is_training=False)
27
- output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1
28
- for o in output:
29
-
30
- #text_output = image_to_text_model(input_image)[0]['label']
31
- # Generar audio a partir del texto
32
- audio_output = text_to_audio_model(o.shape)['audio']
33
- print(o.shape)
34
 
35
 
36
 
37
  # Interfaz Gradio
38
  iface = gr.Interface(
39
  fn=image_to_audio,
40
- inputs= gr.Image(type="pil"),
41
  outputs="audio",
42
  live=True,
43
  interpretation="default",
 
2
  from transformers import pipeline
3
  from PIL import Image, ImageOps
4
  import torch
 
5
 
6
 
7
  #Definir 2 modelos uno de imagen a texto y otro de texto a audio que inyecta
 
18
  # Funci贸n para la interfaz de Gradio
19
  def image_to_audio(input_image):
20
  # Convertir la imagen a texto
21
+ model=gr.Interface.load("models/timm/mobilenetv3_large_100.ra_in1k").launch()
22
+ text_output = image_to_text_model(model)[0]['label']
23
  # get model specific transforms (normalization, resize)
24
+ audio_output = text_to_audio_model(model)['audio']
 
 
 
 
 
 
 
 
25
 
26
 
27
 
28
  # Interfaz Gradio
29
  iface = gr.Interface(
30
  fn=image_to_audio,
31
+ inputs= gr.Image(type='PIL'),
32
  outputs="audio",
33
  live=True,
34
  interpretation="default",