Spaces:

BissakaAI
/

hamid

Sleeping

App Files Files Community

BissakaAI commited on Dec 12, 2025

Commit

e13388c

verified ·

1 Parent(s): 6dea974

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -39

app.py CHANGED Viewed

@@ -1,47 +1,61 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
 from pydub import AudioSegment
 import librosa
-import uvicorn
 import torch
-import soundfile as sf
 # import your existing functions
 from your_model_file import textonly, speechonly
-app = FastAPI(title="Hamid Speech API", version="1.0.0")
-@app.get("/")
-def root():
-    return {"message": "Welcome to Hamid AI Speech API"}
-class TextRequest(BaseModel):
-    text: str
-class SpeechRequest(BaseModel):
-    input_audio_path: str
-    wav_output_path: str
-@app.post("/textonly")
-def run_text(req: TextRequest):
-    result = textonly(req.text)
-    return {"response": result}
-@app.post("/speechonly")
-def run_speech(req: SpeechRequest):
-    # Convert input audio to WAV
-    audio = AudioSegment.from_file(req.input_audio_path)
-    audio = audio.set_frame_rate(16000).set_channels(1)
-    audio.export(req.wav_output_path, format="wav")
-    # Load WAV
-    speech, sr = librosa.load(req.wav_output_path, sr=16000)
-    llm_response, wav_path = speechonly(speech, output_wav_path=req.wav_output_path)
-    return {
-        "response": llm_response,
-        "wav_saved": wav_path
-    }

+import gradio as gr
 from pydub import AudioSegment
 import librosa
 import torch
+import soundfile as sf
+import numpy as np
+import os
 # import your existing functions
 from your_model_file import textonly, speechonly
+def text_interface(text):
+    """Process text input and return response"""
+    result = textonly(text)
+    return result
+def speech_interface(audio_file):
+    """Process speech input and return LLM response and audio output"""
+    if audio_file is None:
+        return "Please provide an audio file", None
+    # audio_file is a tuple of (sample_rate, audio_data) from Gradio
+    sr, audio_data = audio_file
+    # Convert to mono if needed
+    if len(audio_data.shape) > 1:
+        audio_data = np.mean(audio_data, axis=1)
+    # Resample to 16000 Hz if necessary
+    if sr != 16000:
+        audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
+    # Call the speechonly function
+    llm_response, wav_path = speechonly(audio_data, output_wav_path="output.wav")
+    return llm_response, wav_path
+# Create Gradio interface with tabs
+with gr.Blocks(title="Hamid AI Speech API") as app:
+    gr.Markdown("# Hamid AI Speech Interface")
+    gr.Markdown("Choose between text-only or speech-based interaction")
+    with gr.Tab("Text Only"):
+        text_input = gr.Textbox(label="Enter your text", placeholder="Type something...")
+        text_output = gr.Textbox(label="Response", interactive=False)
+        text_button = gr.Button("Process Text")
+        text_button.click(fn=text_interface, inputs=text_input, outputs=text_output)
+    with gr.Tab("Speech Only"):
+        audio_input = gr.Audio(label="Upload or record audio", type="numpy")
+        speech_output = gr.Textbox(label="LLM Response", interactive=False)
+        audio_output = gr.Audio(label="Output Audio", type="filepath")
+        speech_button = gr.Button("Process Speech")
+        speech_button.click(fn=speech_interface, inputs=audio_input, outputs=[speech_output, audio_output])
+if __name__ == "__main__":
+    app.launch(share=False)