Spaces:

walker11
/

rawistt

Sleeping

App Files Files Community

walker11 commited on Jun 20, 2025

Commit

86a9d2d

verified ·

1 Parent(s): 9133686

Upload 4 files

Browse files

Files changed (2) hide show

app.py +139 -79
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,79 +1,139 @@
-import os
-import tempfile
-import whisper
-import requests
-import gradio as gr
-# Load environment variables or use defaults
-DEEPSEEK_API_URL = os.environ.get("DEEPSEEK_API_URL", "https://api.deepseek.com/v1/chat/completions")
-DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
-# Load Whisper model (can be tiny/base/small depending on hardware)
-model = whisper.load_model("base")
-def generate_story(audio_file):
-    try:
-        # Handle the audio file whether it's a string path or an object
-        audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
-        # Transcribe using Whisper
-        result = model.transcribe(audio_path, language="ar")
-        text = result.get("text", "")
-        # Send the transcript to DeepSeek API
-        prompt = f" هذه قصة قصيرة كتبها المستخدم بصوته، من فضلك قم بتصحيح أي أخطاء لغوية أو كلمات غير مفهومة تسبب بيها موديل تحويل الصوت الي نص: {text}"
-        headers = {
-            "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
-            "Content-Type": "application/json"
-        }
-        payload = {
-            "model": "deepseek-chat",  # use your actual model name
-            "messages": [
-                {"role": "user", "content": prompt}
-            ]
-        }
-        # Only make API call if key exists
-        if DEEPSEEK_API_KEY:
-            response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
-            response_json = response.json()
-            if response.status_code == 200 and "choices" in response_json:
-                story = response_json["choices"][0]["message"]["content"]
-            else:
-                story = "حدث خطأ أثناء توليد القصة. تفاصيل: " + str(response_json)
-        else:
-            story = "تنبيه: لم يتم تكوين مفتاح API. الرجاء تعيين متغير البيئة DEEPSEEK_API_KEY."
-        # Return both original transcript and generated story
-        return text, story
-    except Exception as e:
-        return "", f"حدث خطأ: {str(e)}"
-# Define Gradio interface
-with gr.Blocks(title="RAWI Voice to Story Generator") as demo:
-    gr.Markdown("# RAWI Voice to Story Generator")
-    gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى قصة.")
-    with gr.Row():
-        audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
-    with gr.Row():
-        submit_btn = gr.Button("توليد القصة")
-    with gr.Row():
-        transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
-        story_output = gr.Textbox(label="القصة المولدة")
-    submit_btn.click(
-        fn=generate_story,
-        inputs=audio_input,
-        outputs=[transcript_output, story_output],
-    )
-# Launch the app
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
+import tempfile
+import whisper
+import requests
+import gradio as gr
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+import uvicorn
+from pathlib import Path
+# Create FastAPI app
+app = FastAPI(title="RAWI Voice to Story Generator")
+# Configure CORS to allow requests from frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # This can be more restrictive in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Load environment variables or use defaults
+DEEPSEEK_API_URL = os.environ.get("DEEPSEEK_API_URL", "https://api.deepseek.com/v1/chat/completions")
+DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
+# Load Whisper model (can be tiny/base/small depending on hardware)
+model = whisper.load_model("base")
+# FastAPI endpoint for direct API access
+@app.post("/generate-story")
+async def generate_story_api(file: UploadFile = File(...)):
+    try:
+        # Save uploaded audio to a temp file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            tmp.write(await file.read())
+            tmp_path = tmp.name
+        # Process the audio using our function
+        transcript, story = process_audio(tmp_path)
+        # Clean up temp file
+        os.remove(tmp_path)
+        # Return JSON response
+        return JSONResponse({
+            "transcript": transcript,
+            "story": story
+        })
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={"error": str(e)}
+        )
+# Function for processing audio (used by both FastAPI and Gradio)
+def process_audio(audio_path):
+    try:
+        # Transcribe using Whisper
+        result = model.transcribe(audio_path, language="ar")
+        text = result.get("text", "")
+        # Send the transcript to DeepSeek API
+        prompt = f"هذه قصة قصيرة كتبها المستخدم بصوته، من فضلك قم بتصحيح أي أخطاء لغوية أو كلمات غير مفهومة تسبب بيها موديل تحويل الصوت الي نص: {text}"
+        headers = {
+            "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": "deepseek-chat",  # use your actual model name
+            "messages": [
+                {"role": "user", "content": prompt}
+            ]
+        }
+        # Only make API call if key exists
+        if DEEPSEEK_API_KEY:
+            response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
+            response_json = response.json()
+            if response.status_code == 200 and "choices" in response_json:
+                story = response_json["choices"][0]["message"]["content"]
+            else:
+                story = "حدث خطأ أثناء توليد القصة. تفاصيل: " + str(response_json)
+        else:
+            story = "تنبيه: لم يتم تكوين مفتاح API. الرجاء تعيين متغير البيئة DEEPSEEK_API_KEY."
+        return text, story
+    except Exception as e:
+        return "", f"حدث خطأ: {str(e)}"
+# Gradio interface wrapper for the model
+def gradio_process(audio_file):
+    try:
+        # Handle the audio file whether it's a string path or an object
+        audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
+        # Process the audio
+        transcript, story = process_audio(audio_path)
+        return transcript, story
+    except Exception as e:
+        return "", f"حدث خطأ: {str(e)}"
+# Define Gradio interface
+with gr.Blocks(title="RAWI Voice to Story Generator") as demo:
+    gr.Markdown("# RAWI Voice to Story Generator")
+    gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى قصة.")
+    with gr.Row():
+        audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
+    with gr.Row():
+        submit_btn = gr.Button("توليد القصة")
+    with gr.Row():
+        transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
+        story_output = gr.Textbox(label="القصة المولدة")
+    submit_btn.click(
+        fn=gradio_process,
+        inputs=audio_input,
+        outputs=[transcript_output, story_output],
+    )
+# Mount static files for frontend if they exist
+frontend_path = Path("../front")
+if frontend_path.exists():
+    app.mount("/", StaticFiles(directory=str(frontend_path), html=True), name="frontend")
+# Launch with uvicorn when run directly
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)

requirements.txt CHANGED Viewed

@@ -3,4 +3,7 @@ openai-whisper==20231117
 torch==2.0.1
 requests==2.31.0
 ffmpeg-python==0.2.0
 --extra-index-url https://download.pytorch.org/whl/cpu

 torch==2.0.1
 requests==2.31.0
 ffmpeg-python==0.2.0
+fastapi==0.103.1
+uvicorn==0.23.2
+python-multipart==0.0.6
 --extra-index-url https://download.pytorch.org/whl/cpu