mrnoisette commited on
Commit
6124b60
·
verified ·
1 Parent(s): 79a2e3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -5,7 +5,7 @@ import io
5
  from PIL import Image
6
  from dotenv import load_dotenv
7
  from pydub import AudioSegment
8
- from huggingface_hub import InferenceApi
9
  import json
10
 
11
  load_dotenv()
@@ -15,13 +15,13 @@ FLUX1_APIKEY = os.getenv('FLUX1_APIKEY')
15
  # LLM
16
  GEMINI_APIKEY = os.getenv('GEMINI_APIKEY')
17
  # Modèle Whisper pour la transcription audio
18
- WHISPER_MODEL = InferenceApi(repo_id="openai/whisper-large-v3")
19
 
20
  def speech_to_text(audio):
21
  audio_data = AudioSegment.from_file(audio)
22
  audio_bytes = io.BytesIO()
23
  audio_data.export(audio_bytes, format="wav")
24
- response = WHISPER_MODEL({"inputs": audio_bytes.getvalue()})
25
  return response['text']
26
 
27
  def GenerateTextLLM(inputText):
@@ -34,12 +34,15 @@ def GenerateTextLLM(inputText):
34
  "model": "gemini-1.5-flash-latest"
35
  }
36
 
37
- response = requests.post(url, headers=headers, json=data)
38
-
39
  try:
40
- return json.loads(response.text)['candidates'][0]['content']['parts'][0]['text']
41
- except:
42
- return 'Error in LLM processing'
 
 
 
 
 
43
 
44
  def GenerateImageFromText(prompt):
45
  API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
@@ -74,13 +77,13 @@ def Main(text_input, audio_input):
74
 
75
  # Interface Gradio
76
  inputs = [
77
- gr.inputs.Textbox(label="Texte (laisser vide si audio fourni)", lines=5, placeholder="Entrez votre texte ici..."),
78
- gr.inputs.Audio(source="upload", type="file", label="Fichier audio (laisser vide si texte fourni)")
79
  ]
80
 
81
  outputs = [
82
- gr.outputs.Gallery(label="Diapositives générées"),
83
- gr.outputs.Textbox(label="Résumé en axes")
84
  ]
85
 
86
  interface = gr.Interface(
@@ -93,3 +96,4 @@ interface = gr.Interface(
93
 
94
  if __name__ == "__main__":
95
  interface.launch()
 
 
5
  from PIL import Image
6
  from dotenv import load_dotenv
7
  from pydub import AudioSegment
8
+ from huggingface_hub import InferenceClient
9
  import json
10
 
11
  load_dotenv()
 
15
  # LLM
16
  GEMINI_APIKEY = os.getenv('GEMINI_APIKEY')
17
  # Modèle Whisper pour la transcription audio
18
+ WHISPER_MODEL = InferenceClient(repo_id="openai/whisper-large-v3")
19
 
20
  def speech_to_text(audio):
21
  audio_data = AudioSegment.from_file(audio)
22
  audio_bytes = io.BytesIO()
23
  audio_data.export(audio_bytes, format="wav")
24
+ response = WHISPER_MODEL.call({"inputs": audio_bytes.getvalue()})
25
  return response['text']
26
 
27
  def GenerateTextLLM(inputText):
 
34
  "model": "gemini-1.5-flash-latest"
35
  }
36
 
 
 
37
  try:
38
+ response = requests.post(url, headers=headers, json=data)
39
+ response.raise_for_status() # Va lancer une exception si la réponse a un statut d'erreur
40
+ result = json.loads(response.text)
41
+ return result['candidates'][0]['content']['parts'][0]['text']
42
+ except requests.exceptions.RequestException as e:
43
+ return f'Error in LLM processing: {e}'
44
+ except (KeyError, json.JSONDecodeError) as e:
45
+ return f'Error parsing response: {e}'
46
 
47
  def GenerateImageFromText(prompt):
48
  API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
 
77
 
78
  # Interface Gradio
79
  inputs = [
80
+ gr.Textbox(label="Texte (laisser vide si audio fourni)", lines=5, placeholder="Entrez votre texte ici..."),
81
+ gr.Audio(source="upload", type="file", label="Fichier audio (laisser vide si texte fourni)")
82
  ]
83
 
84
  outputs = [
85
+ gr.Gallery(label="Diapositives générées"),
86
+ gr.Textbox(label="Résumé en axes")
87
  ]
88
 
89
  interface = gr.Interface(
 
96
 
97
  if __name__ == "__main__":
98
  interface.launch()
99
+