BissakaAI commited on
Commit
e13388c
·
verified ·
1 Parent(s): 6dea974

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -39
app.py CHANGED
@@ -1,47 +1,61 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
  from pydub import AudioSegment
4
  import librosa
5
- import uvicorn
6
  import torch
7
- import soundfile as sf
 
 
8
 
9
  # import your existing functions
10
  from your_model_file import textonly, speechonly
11
 
12
- app = FastAPI(title="Hamid Speech API", version="1.0.0")
13
 
14
- @app.get("/")
15
- def root():
16
- return {"message": "Welcome to Hamid AI Speech API"}
17
-
18
- class TextRequest(BaseModel):
19
- text: str
20
-
21
- class SpeechRequest(BaseModel):
22
- input_audio_path: str
23
- wav_output_path: str
24
-
25
-
26
- @app.post("/textonly")
27
- def run_text(req: TextRequest):
28
- result = textonly(req.text)
29
- return {"response": result}
30
-
31
-
32
- @app.post("/speechonly")
33
- def run_speech(req: SpeechRequest):
34
- # Convert input audio to WAV
35
- audio = AudioSegment.from_file(req.input_audio_path)
36
- audio = audio.set_frame_rate(16000).set_channels(1)
37
- audio.export(req.wav_output_path, format="wav")
38
-
39
- # Load WAV
40
- speech, sr = librosa.load(req.wav_output_path, sr=16000)
41
-
42
- llm_response, wav_path = speechonly(speech, output_wav_path=req.wav_output_path)
43
-
44
- return {
45
- "response": llm_response,
46
- "wav_saved": wav_path
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
 
2
  from pydub import AudioSegment
3
  import librosa
 
4
  import torch
5
+ import soundfile as sf
6
+ import numpy as np
7
+ import os
8
 
9
  # import your existing functions
10
  from your_model_file import textonly, speechonly
11
 
 
12
 
13
+ def text_interface(text):
14
+ """Process text input and return response"""
15
+ result = textonly(text)
16
+ return result
17
+
18
+
19
+ def speech_interface(audio_file):
20
+ """Process speech input and return LLM response and audio output"""
21
+ if audio_file is None:
22
+ return "Please provide an audio file", None
23
+
24
+ # audio_file is a tuple of (sample_rate, audio_data) from Gradio
25
+ sr, audio_data = audio_file
26
+
27
+ # Convert to mono if needed
28
+ if len(audio_data.shape) > 1:
29
+ audio_data = np.mean(audio_data, axis=1)
30
+
31
+ # Resample to 16000 Hz if necessary
32
+ if sr != 16000:
33
+ audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
34
+
35
+ # Call the speechonly function
36
+ llm_response, wav_path = speechonly(audio_data, output_wav_path="output.wav")
37
+
38
+ return llm_response, wav_path
39
+
40
+
41
+ # Create Gradio interface with tabs
42
+ with gr.Blocks(title="Hamid AI Speech API") as app:
43
+ gr.Markdown("# Hamid AI Speech Interface")
44
+ gr.Markdown("Choose between text-only or speech-based interaction")
45
+
46
+ with gr.Tab("Text Only"):
47
+ text_input = gr.Textbox(label="Enter your text", placeholder="Type something...")
48
+ text_output = gr.Textbox(label="Response", interactive=False)
49
+ text_button = gr.Button("Process Text")
50
+ text_button.click(fn=text_interface, inputs=text_input, outputs=text_output)
51
+
52
+ with gr.Tab("Speech Only"):
53
+ audio_input = gr.Audio(label="Upload or record audio", type="numpy")
54
+ speech_output = gr.Textbox(label="LLM Response", interactive=False)
55
+ audio_output = gr.Audio(label="Output Audio", type="filepath")
56
+ speech_button = gr.Button("Process Speech")
57
+ speech_button.click(fn=speech_interface, inputs=audio_input, outputs=[speech_output, audio_output])
58
+
59
+
60
+ if __name__ == "__main__":
61
+ app.launch(share=False)