walker11 commited on
Commit
86a9d2d
·
verified ·
1 Parent(s): 9133686

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +139 -79
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,79 +1,139 @@
1
- import os
2
- import tempfile
3
- import whisper
4
- import requests
5
- import gradio as gr
6
-
7
- # Load environment variables or use defaults
8
- DEEPSEEK_API_URL = os.environ.get("DEEPSEEK_API_URL", "https://api.deepseek.com/v1/chat/completions")
9
- DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
10
-
11
- # Load Whisper model (can be tiny/base/small depending on hardware)
12
- model = whisper.load_model("base")
13
-
14
- def generate_story(audio_file):
15
- try:
16
- # Handle the audio file whether it's a string path or an object
17
- audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
18
-
19
- # Transcribe using Whisper
20
- result = model.transcribe(audio_path, language="ar")
21
- text = result.get("text", "")
22
-
23
- # Send the transcript to DeepSeek API
24
- prompt = f" هذه قصة قصيرة كتبها المستخدم بصوته، من فضلك قم بتصحيح أي أخطاء لغوية أو كلمات غير مفهومة تسبب بيها موديل تحويل الصوت الي نص: {text}"
25
-
26
- headers = {
27
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
28
- "Content-Type": "application/json"
29
- }
30
-
31
- payload = {
32
- "model": "deepseek-chat", # use your actual model name
33
- "messages": [
34
- {"role": "user", "content": prompt}
35
- ]
36
- }
37
-
38
- # Only make API call if key exists
39
- if DEEPSEEK_API_KEY:
40
- response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
41
- response_json = response.json()
42
-
43
- if response.status_code == 200 and "choices" in response_json:
44
- story = response_json["choices"][0]["message"]["content"]
45
- else:
46
- story = "حدث خطأ أثناء توليد القصة. تفاصيل: " + str(response_json)
47
- else:
48
- story = "تنبيه: لم يتم تكوين مفتاح API. الرجاء تعيين متغير البيئة DEEPSEEK_API_KEY."
49
-
50
- # Return both original transcript and generated story
51
- return text, story
52
-
53
- except Exception as e:
54
- return "", f"حدث خطأ: {str(e)}"
55
-
56
- # Define Gradio interface
57
- with gr.Blocks(title="RAWI Voice to Story Generator") as demo:
58
- gr.Markdown("# RAWI Voice to Story Generator")
59
- gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى قصة.")
60
-
61
- with gr.Row():
62
- audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
63
-
64
- with gr.Row():
65
- submit_btn = gr.Button("توليد القصة")
66
-
67
- with gr.Row():
68
- transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
69
- story_output = gr.Textbox(label="القصة المولدة")
70
-
71
- submit_btn.click(
72
- fn=generate_story,
73
- inputs=audio_input,
74
- outputs=[transcript_output, story_output],
75
- )
76
-
77
- # Launch the app
78
- if __name__ == "__main__":
79
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import whisper
4
+ import requests
5
+ import gradio as gr
6
+ from fastapi import FastAPI, File, UploadFile
7
+ from fastapi.responses import JSONResponse
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.staticfiles import StaticFiles
10
+ import uvicorn
11
+ from pathlib import Path
12
+
13
+ # Create FastAPI app
14
+ app = FastAPI(title="RAWI Voice to Story Generator")
15
+
16
+ # Configure CORS to allow requests from frontend
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # This can be more restrictive in production
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # Load environment variables or use defaults
26
+ DEEPSEEK_API_URL = os.environ.get("DEEPSEEK_API_URL", "https://api.deepseek.com/v1/chat/completions")
27
+ DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
28
+
29
+ # Load Whisper model (can be tiny/base/small depending on hardware)
30
+ model = whisper.load_model("base")
31
+
32
+ # FastAPI endpoint for direct API access
33
+ @app.post("/generate-story")
34
+ async def generate_story_api(file: UploadFile = File(...)):
35
+ try:
36
+ # Save uploaded audio to a temp file
37
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
38
+ tmp.write(await file.read())
39
+ tmp_path = tmp.name
40
+
41
+ # Process the audio using our function
42
+ transcript, story = process_audio(tmp_path)
43
+
44
+ # Clean up temp file
45
+ os.remove(tmp_path)
46
+
47
+ # Return JSON response
48
+ return JSONResponse({
49
+ "transcript": transcript,
50
+ "story": story
51
+ })
52
+ except Exception as e:
53
+ return JSONResponse(
54
+ status_code=500,
55
+ content={"error": str(e)}
56
+ )
57
+
58
+ # Function for processing audio (used by both FastAPI and Gradio)
59
+ def process_audio(audio_path):
60
+ try:
61
+ # Transcribe using Whisper
62
+ result = model.transcribe(audio_path, language="ar")
63
+ text = result.get("text", "")
64
+
65
+ # Send the transcript to DeepSeek API
66
+ prompt = f"هذه قصة قصيرة كتبها المستخدم بصوته، من فضلك قم بتصحيح أي أخطاء لغوية أو كلمات غير مفهومة تسبب بيها موديل تحويل الصوت الي نص: {text}"
67
+
68
+ headers = {
69
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
70
+ "Content-Type": "application/json"
71
+ }
72
+
73
+ payload = {
74
+ "model": "deepseek-chat", # use your actual model name
75
+ "messages": [
76
+ {"role": "user", "content": prompt}
77
+ ]
78
+ }
79
+
80
+ # Only make API call if key exists
81
+ if DEEPSEEK_API_KEY:
82
+ response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
83
+ response_json = response.json()
84
+
85
+ if response.status_code == 200 and "choices" in response_json:
86
+ story = response_json["choices"][0]["message"]["content"]
87
+ else:
88
+ story = "حدث خطأ أثناء توليد القصة. تفاصيل: " + str(response_json)
89
+ else:
90
+ story = "تنبيه: لم يتم تكوين مفتاح API. الرجاء تعيين متغير البيئة DEEPSEEK_API_KEY."
91
+
92
+ return text, story
93
+
94
+ except Exception as e:
95
+ return "", f"حدث خطأ: {str(e)}"
96
+
97
+ # Gradio interface wrapper for the model
98
+ def gradio_process(audio_file):
99
+ try:
100
+ # Handle the audio file whether it's a string path or an object
101
+ audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
102
+
103
+ # Process the audio
104
+ transcript, story = process_audio(audio_path)
105
+
106
+ return transcript, story
107
+
108
+ except Exception as e:
109
+ return "", f"حدث خطأ: {str(e)}"
110
+
111
+ # Define Gradio interface
112
+ with gr.Blocks(title="RAWI Voice to Story Generator") as demo:
113
+ gr.Markdown("# RAWI Voice to Story Generator")
114
+ gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى قصة.")
115
+
116
+ with gr.Row():
117
+ audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
118
+
119
+ with gr.Row():
120
+ submit_btn = gr.Button("توليد القصة")
121
+
122
+ with gr.Row():
123
+ transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
124
+ story_output = gr.Textbox(label="القصة المولدة")
125
+
126
+ submit_btn.click(
127
+ fn=gradio_process,
128
+ inputs=audio_input,
129
+ outputs=[transcript_output, story_output],
130
+ )
131
+
132
+ # Mount static files for frontend if they exist
133
+ frontend_path = Path("../front")
134
+ if frontend_path.exists():
135
+ app.mount("/", StaticFiles(directory=str(frontend_path), html=True), name="frontend")
136
+
137
+ # Launch with uvicorn when run directly
138
+ if __name__ == "__main__":
139
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
requirements.txt CHANGED
@@ -3,4 +3,7 @@ openai-whisper==20231117
3
  torch==2.0.1
4
  requests==2.31.0
5
  ffmpeg-python==0.2.0
 
 
 
6
  --extra-index-url https://download.pytorch.org/whl/cpu
 
3
  torch==2.0.1
4
  requests==2.31.0
5
  ffmpeg-python==0.2.0
6
+ fastapi==0.103.1
7
+ uvicorn==0.23.2
8
+ python-multipart==0.0.6
9
  --extra-index-url https://download.pytorch.org/whl/cpu