DGutierrez81 commited on
Commit
b5f36c2
·
verified ·
1 Parent(s): aa43f8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -26
app.py CHANGED
@@ -1,19 +1,19 @@
1
  import gradio as gr
2
  import requests
3
  from PIL import Image
4
- from io import BytesIO
5
  from transformers import pipeline
6
- from datasets import load_dataset
7
  import torch
8
  import soundfile as sf
9
 
10
-
11
  image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
12
  synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
13
 
14
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
15
- speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
16
 
 
17
  url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
18
  response = requests.get(url)
19
  lista = []
@@ -26,45 +26,52 @@ if response.status_code == 200:
26
  else:
27
  print(f"Error: {response.status_code}")
28
 
 
29
  def change_textbox(choice):
30
  cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
31
  data = cocktail.json()
32
  dataCocktail = data.get("drinks", [])
33
-
 
 
 
 
34
  for i in dataCocktail:
35
  if i['strDrink'].lower() == choice.lower():
36
  name = i['strDrink']
37
  instructions = i['strInstructions']
38
  image_url = i['strDrinkThumb']
39
  break
40
-
41
- textInstructions = gr.Textbox(instructions)
42
-
43
  img_response = requests.get(image_url)
44
  image = Image.open(BytesIO(img_response.content)).convert("RGB")
45
-
 
46
  result = image_to_text(image)
47
  descripcion = result[0]['generated_text']
48
-
 
49
  speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
50
  sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
51
  audio_path = "speech.wav"
52
-
 
53
  speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
54
  sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
55
  audio_path2 = "speech2.wav"
56
-
57
- return name, image,textInstructions,audio_path,descripcion,audio_path2
58
 
 
 
 
59
  with gr.Blocks() as demo:
60
  gr.HTML(
61
  """
62
  <style>
63
- /* Cambiar el fondo de toda la página */
64
  body {
65
- background-color: #000000;
66
- color: #FFFFFF;
67
- font-family: Arial, sans-serif;
68
  margin: 0;
69
  padding: 0;
70
  text-align: center;
@@ -79,7 +86,7 @@ with gr.Blocks() as demo:
79
  align-items: center;
80
  justify-content: center;
81
  }
82
-
83
  .gradio-container .gradio-radio {
84
  display: inline-block;
85
  margin: 10px;
@@ -93,18 +100,20 @@ with gr.Blocks() as demo:
93
  """
94
  )
95
 
96
- gr.Markdown(
97
- """<h1>Cocktails Descriptions</h1>"""
98
- )
99
 
100
  radio = gr.Radio(lista, label="Choose your cocktail:")
101
- text = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Cocktail Name")
102
  imagen = gr.Image(label="Cocktail Image")
103
- text2 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Instructions")
104
  audio = gr.Audio(label="Cocktail Instructions Audio")
105
  text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
106
  audio2 = gr.Audio(label="Audio image description")
107
 
108
- radio.change(fn=change_textbox, inputs=radio, outputs=[text, imagen,text2, audio,text3, audio2])
 
 
 
 
109
 
110
- demo.launch()
 
1
  import gradio as gr
2
  import requests
3
  from PIL import Image
4
+ from io import BytesIO
5
  from transformers import pipeline
 
6
  import torch
7
  import soundfile as sf
8
 
9
+ # Pipelines de Transformers
10
  image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
11
  synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
12
 
13
+ # Speaker embedding aleatorio (para demo)
14
+ speaker_embedding = torch.randn(1, 512)
15
 
16
+ # Obtener lista de cócteles desde la API
17
  url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
18
  response = requests.get(url)
19
  lista = []
 
26
  else:
27
  print(f"Error: {response.status_code}")
28
 
29
+ # Función principal que actualiza la interfaz
30
  def change_textbox(choice):
31
  cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
32
  data = cocktail.json()
33
  dataCocktail = data.get("drinks", [])
34
+
35
+ name = ""
36
+ instructions = ""
37
+ image_url = ""
38
+
39
  for i in dataCocktail:
40
  if i['strDrink'].lower() == choice.lower():
41
  name = i['strDrink']
42
  instructions = i['strInstructions']
43
  image_url = i['strDrinkThumb']
44
  break
45
+
46
+ # Cargar imagen
 
47
  img_response = requests.get(image_url)
48
  image = Image.open(BytesIO(img_response.content)).convert("RGB")
49
+
50
+ # Generar descripción de la imagen
51
  result = image_to_text(image)
52
  descripcion = result[0]['generated_text']
53
+
54
+ # Generar audio de instrucciones
55
  speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
56
  sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
57
  audio_path = "speech.wav"
58
+
59
+ # Generar audio de descripción de la imagen
60
  speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
61
  sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
62
  audio_path2 = "speech2.wav"
 
 
63
 
64
+ return name, image, instructions, audio_path, descripcion, audio_path2
65
+
66
+ # Crear la interfaz de Gradio
67
  with gr.Blocks() as demo:
68
  gr.HTML(
69
  """
70
  <style>
 
71
  body {
72
+ background-color: #000000;
73
+ color: #ffffff;
74
+ font-family: Arial, sans-serif;
75
  margin: 0;
76
  padding: 0;
77
  text-align: center;
 
86
  align-items: center;
87
  justify-content: center;
88
  }
89
+
90
  .gradio-container .gradio-radio {
91
  display: inline-block;
92
  margin: 10px;
 
100
  """
101
  )
102
 
103
+ gr.Markdown("<h1>Cocktails Descriptions</h1>")
 
 
104
 
105
  radio = gr.Radio(lista, label="Choose your cocktail:")
106
+ text = gr.Textbox(lines=1, interactive=False, show_copy_button=True, label="Cocktail Name")
107
  imagen = gr.Image(label="Cocktail Image")
108
+ text2 = gr.Textbox(lines=4, interactive=False, show_copy_button=True, label="Instructions")
109
  audio = gr.Audio(label="Cocktail Instructions Audio")
110
  text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
111
  audio2 = gr.Audio(label="Audio image description")
112
 
113
+ radio.change(
114
+ fn=change_textbox,
115
+ inputs=radio,
116
+ outputs=[text, imagen, text2, audio, text3, audio2]
117
+ )
118
 
119
+ demo.launch(share=True)