Spaces:

Josephanthraper
/

new-space

Sleeping

App Files Files Community

Josephanthraper commited on Sep 25, 2025

Commit

82d0705

verified ·

1 Parent(s): e3f3705

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -30

app.py CHANGED Viewed

@@ -1,48 +1,112 @@
-# app.py
-import gradio as gr
-from whisper_jax import FlaxWhisperPipline
 import jax.numpy as jnp
 from pydub import AudioSegment
-import os
-# Load Whisper JAX model once (on startup)
 asr_pipeline = FlaxWhisperPipline(
     "parthiv11/indic_whisper_nodcil",
     dtype=jnp.bfloat16
 )
-# Convert audio to wav (most stable for Whisper)
-def convert_to_wav(audio_file):
-    wav_path = audio_file.rsplit(".", 1)[0] + ".wav"
-    sound = AudioSegment.from_file(audio_file)
     sound.export(wav_path, format="wav")
     return wav_path
-# Function connected to Gradio
-def transcribe(audio_file):
-    if audio_file is None:
-        return "Please upload an audio file."
     try:
-        wav_file = convert_to_wav(audio_file)
-        result = asr_pipeline(wav_file)
-        # Clean up temp wav file if created
-        if wav_file != audio_file and os.path.exists(wav_file):
-            os.remove(wav_file)
-        return result["text"] if isinstance(result, dict) else result
     except Exception as e:
-        return f"Error processing audio: {str(e)}"
-# Build UI
-demo = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
-    outputs=gr.Textbox(label="Transcription", lines=10),
-    title="Whisper (JAX)",
-    description="Upload or record Hindi speech and get transcription"
-)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

+# fastapi_app.py
+import os
 import jax.numpy as jnp
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import JSONResponse, HTMLResponse
+from whisper_jax import FlaxWhisperPipline
 from pydub import AudioSegment
+import uvicorn
+import tempfile
+app = FastAPI(title="Whisper JAX API", description="Transcribe Hindi speech using Whisper JAX", version="1.0")
+# Load Whisper JAX model once on startup
 asr_pipeline = FlaxWhisperPipline(
     "parthiv11/indic_whisper_nodcil",
     dtype=jnp.bfloat16
 )
+# Convert audio to wav
+def convert_to_wav(input_path: str) -> str:
+    wav_path = input_path.rsplit(".", 1)[0] + ".wav"
+    sound = AudioSegment.from_file(input_path)
     sound.export(wav_path, format="wav")
     return wav_path
+@app.get("/")
+async def root():
+    return {"message": "Whisper JAX API is running!"}
+@app.post("/transcribe")
+async def transcribe(file: UploadFile = File(...)):
     try:
+        suffix = os.path.splitext(file.filename)[-1]
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
+            temp_file.write(await file.read())
+            temp_path = temp_file.name
+        wav_path = convert_to_wav(temp_path)
+        result = asr_pipeline(wav_path)
+        # Cleanup
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+        if wav_path != temp_path and os.path.exists(wav_path):
+            os.remove(wav_path)
+        transcription = result["text"] if isinstance(result, dict) else result
+        return JSONResponse(content={"transcription": transcription})
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing audio: {str(e)}")
+@app.get("/ui")
+async def serve_ui():
+    html_content = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+      <title>Whisper JAX Transcription</title>
+      <style>
+        body { font-family: Arial, sans-serif; max-width: 600px; margin: 30px auto; padding: 20px; background: #f4f6f8; }
+        h2 { text-align: center; }
+        .card { background: white; padding: 20px; border-radius: 12px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
+        #output { margin-top: 20px; padding: 15px; background: #fafafa; border: 1px solid #ddd; border-radius: 8px; min-height: 100px; }
+        button { padding: 10px 20px; border: none; border-radius: 8px; background: #06b6d4; color: white; cursor: pointer; }
+        button:hover { background: #0891b2; }
+      </style>
+    </head>
+    <body>
+      <div class="card">
+        <h2>Whisper (JAX) Speech-to-Text</h2>
+        <form id="uploadForm">
+          <input type="file" id="audioFile" name="file" accept="audio/*" required />
+          <button type="submit">Transcribe</button>
+        </form>
+        <div id="output">Transcription will appear here...</div>
+      </div>
+      <script>
+        document.getElementById("uploadForm").addEventListener("submit", async function(e) {
+          e.preventDefault();
+          const fileInput = document.getElementById("audioFile");
+          if (!fileInput.files.length) return;
+          const formData = new FormData();
+          formData.append("file", fileInput.files[0]);
+          document.getElementById("output").innerText = "Processing...";
+          try {
+            const response = await fetch("/transcribe", {
+              method: "POST",
+              body: formData
+            });
+            const data = await response.json();
+            if (data.transcription) {
+              document.getElementById("output").innerText = data.transcription;
+            } else {
+              document.getElementById("output").innerText = "Error: " + JSON.stringify(data);
+            }
+          } catch (err) {
+            document.getElementById("output").innerText = "Failed: " + err.message;
+          }
+        });
+      </script>
+    </body>
+    </html>
+    """
+    return HTMLResponse(content=html_content)
 if __name__ == "__main__":
+    uvicorn.run("fastapi_app:app", host="0.0.0.0", port=7860, reload=True)