Spaces:

walker11
/

rawistt

Sleeping

App Files Files Community

walker11 commited on Jun 20, 2025

Commit

76ac4df

verified ·

1 Parent(s): 86a9d2d

Upload 4 files

Browse files

Files changed (2) hide show

app.py +14 -50
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import tempfile
 import whisper
-import requests
 import gradio as gr
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse
@@ -11,7 +10,7 @@ import uvicorn
 from pathlib import Path
 # Create FastAPI app
-app = FastAPI(title="RAWI Voice to Story Generator")
 # Configure CORS to allow requests from frontend
 app.add_middleware(
@@ -22,10 +21,6 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Load environment variables or use defaults
-DEEPSEEK_API_URL = os.environ.get("DEEPSEEK_API_URL", "https://api.deepseek.com/v1/chat/completions")
-DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
 # Load Whisper model (can be tiny/base/small depending on hardware)
 model = whisper.load_model("base")
@@ -39,15 +34,14 @@ async def generate_story_api(file: UploadFile = File(...)):
             tmp_path = tmp.name
         # Process the audio using our function
-        transcript, story = process_audio(tmp_path)
         # Clean up temp file
         os.remove(tmp_path)
         # Return JSON response
         return JSONResponse({
-            "transcript": transcript,
-            "story": story
         })
     except Exception as e:
         return JSONResponse(
@@ -56,43 +50,14 @@ async def generate_story_api(file: UploadFile = File(...)):
         )
 # Function for processing audio (used by both FastAPI and Gradio)
-def process_audio(audio_path):
     try:
         # Transcribe using Whisper
         result = model.transcribe(audio_path, language="ar")
         text = result.get("text", "")
-        # Send the transcript to DeepSeek API
-        prompt = f"هذه قصة قصيرة كتبها المستخدم بصوته، من فضلك قم بتصحيح أي أخطاء لغوية أو كلمات غير مفهومة تسبب بيها موديل تحويل الصوت الي نص: {text}"
-        headers = {
-            "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
-            "Content-Type": "application/json"
-        }
-        payload = {
-            "model": "deepseek-chat",  # use your actual model name
-            "messages": [
-                {"role": "user", "content": prompt}
-            ]
-        }
-        # Only make API call if key exists
-        if DEEPSEEK_API_KEY:
-            response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
-            response_json = response.json()
-            if response.status_code == 200 and "choices" in response_json:
-                story = response_json["choices"][0]["message"]["content"]
-            else:
-                story = "حدث خطأ أثناء توليد القصة. تفاصيل: " + str(response_json)
-        else:
-            story = "تنبيه: لم يتم تكوين مفتاح API. الرجاء تعيين متغير البيئة DEEPSEEK_API_KEY."
-        return text, story
     except Exception as e:
-        return "", f"حدث خطأ: {str(e)}"
 # Gradio interface wrapper for the model
 def gradio_process(audio_file):
@@ -101,32 +66,31 @@ def gradio_process(audio_file):
         audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
         # Process the audio
-        transcript, story = process_audio(audio_path)
-        return transcript, story
     except Exception as e:
-        return "", f"حدث خطأ: {str(e)}"
 # Define Gradio interface
-with gr.Blocks(title="RAWI Voice to Story Generator") as demo:
-    gr.Markdown("# RAWI Voice to Story Generator")
-    gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى قصة.")
     with gr.Row():
         audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
     with gr.Row():
-        submit_btn = gr.Button("توليد القصة")
     with gr.Row():
         transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
-        story_output = gr.Textbox(label="القصة المولدة")
     submit_btn.click(
         fn=gradio_process,
         inputs=audio_input,
-        outputs=[transcript_output, story_output],
     )
 # Mount static files for frontend if they exist

 import os
 import tempfile
 import whisper
 import gradio as gr
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse
 from pathlib import Path
 # Create FastAPI app
+app = FastAPI(title="Speech to Text Model")
 # Configure CORS to allow requests from frontend
 app.add_middleware(
     allow_headers=["*"],
 )
 # Load Whisper model (can be tiny/base/small depending on hardware)
 model = whisper.load_model("base")
             tmp_path = tmp.name
         # Process the audio using our function
+        transcript = transcribe_audio(tmp_path)
         # Clean up temp file
         os.remove(tmp_path)
         # Return JSON response
         return JSONResponse({
+            "transcript": transcript
         })
     except Exception as e:
         return JSONResponse(
         )
 # Function for processing audio (used by both FastAPI and Gradio)
+def transcribe_audio(audio_path):
     try:
         # Transcribe using Whisper
         result = model.transcribe(audio_path, language="ar")
         text = result.get("text", "")
+        return text
     except Exception as e:
+        return f"حدث خطأ: {str(e)}"
 # Gradio interface wrapper for the model
 def gradio_process(audio_file):
         audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
         # Process the audio
+        transcript = transcribe_audio(audio_path)
+        return transcript
     except Exception as e:
+        return f"حدث خطأ: {str(e)}"
 # Define Gradio interface
+with gr.Blocks(title="Speech to Text Model") as demo:
+    gr.Markdown("# Speech to Text")
+    gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى نص.")
     with gr.Row():
         audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
     with gr.Row():
+        submit_btn = gr.Button("تحويل إلى نص")
     with gr.Row():
         transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
     submit_btn.click(
         fn=gradio_process,
         inputs=audio_input,
+        outputs=transcript_output,
     )
 # Mount static files for frontend if they exist

requirements.txt CHANGED Viewed

@@ -1,9 +1,8 @@
 gradio>=3.50.2
 openai-whisper==20231117
 torch==2.0.1
-requests==2.31.0
-ffmpeg-python==0.2.0
 fastapi==0.103.1
 uvicorn==0.23.2
 python-multipart==0.0.6
 --extra-index-url https://download.pytorch.org/whl/cpu

 gradio>=3.50.2
 openai-whisper==20231117
 torch==2.0.1
 fastapi==0.103.1
 uvicorn==0.23.2
 python-multipart==0.0.6
+ffmpeg-python==0.2.0
 --extra-index-url https://download.pytorch.org/whl/cpu