walker11 commited on
Commit
76ac4df
·
verified ·
1 Parent(s): 86a9d2d

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +14 -50
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import tempfile
3
  import whisper
4
- import requests
5
  import gradio as gr
6
  from fastapi import FastAPI, File, UploadFile
7
  from fastapi.responses import JSONResponse
@@ -11,7 +10,7 @@ import uvicorn
11
  from pathlib import Path
12
 
13
  # Create FastAPI app
14
- app = FastAPI(title="RAWI Voice to Story Generator")
15
 
16
  # Configure CORS to allow requests from frontend
17
  app.add_middleware(
@@ -22,10 +21,6 @@ app.add_middleware(
22
  allow_headers=["*"],
23
  )
24
 
25
- # Load environment variables or use defaults
26
- DEEPSEEK_API_URL = os.environ.get("DEEPSEEK_API_URL", "https://api.deepseek.com/v1/chat/completions")
27
- DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY", "")
28
-
29
  # Load Whisper model (can be tiny/base/small depending on hardware)
30
  model = whisper.load_model("base")
31
 
@@ -39,15 +34,14 @@ async def generate_story_api(file: UploadFile = File(...)):
39
  tmp_path = tmp.name
40
 
41
  # Process the audio using our function
42
- transcript, story = process_audio(tmp_path)
43
 
44
  # Clean up temp file
45
  os.remove(tmp_path)
46
 
47
  # Return JSON response
48
  return JSONResponse({
49
- "transcript": transcript,
50
- "story": story
51
  })
52
  except Exception as e:
53
  return JSONResponse(
@@ -56,43 +50,14 @@ async def generate_story_api(file: UploadFile = File(...)):
56
  )
57
 
58
  # Function for processing audio (used by both FastAPI and Gradio)
59
- def process_audio(audio_path):
60
  try:
61
  # Transcribe using Whisper
62
  result = model.transcribe(audio_path, language="ar")
63
  text = result.get("text", "")
64
-
65
- # Send the transcript to DeepSeek API
66
- prompt = f"هذه قصة قصيرة كتبها المستخدم بصوته، من فضلك قم بتصحيح أي أخطاء لغوية أو كلمات غير مفهومة تسبب بيها موديل تحويل الصوت الي نص: {text}"
67
-
68
- headers = {
69
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
70
- "Content-Type": "application/json"
71
- }
72
-
73
- payload = {
74
- "model": "deepseek-chat", # use your actual model name
75
- "messages": [
76
- {"role": "user", "content": prompt}
77
- ]
78
- }
79
-
80
- # Only make API call if key exists
81
- if DEEPSEEK_API_KEY:
82
- response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
83
- response_json = response.json()
84
-
85
- if response.status_code == 200 and "choices" in response_json:
86
- story = response_json["choices"][0]["message"]["content"]
87
- else:
88
- story = "حدث خطأ أثناء توليد القصة. تفاصيل: " + str(response_json)
89
- else:
90
- story = "تنبيه: لم يتم تكوين مفتاح API. الرجاء تعيين متغير البيئة DEEPSEEK_API_KEY."
91
-
92
- return text, story
93
-
94
  except Exception as e:
95
- return "", f"حدث خطأ: {str(e)}"
96
 
97
  # Gradio interface wrapper for the model
98
  def gradio_process(audio_file):
@@ -101,32 +66,31 @@ def gradio_process(audio_file):
101
  audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
102
 
103
  # Process the audio
104
- transcript, story = process_audio(audio_path)
105
 
106
- return transcript, story
107
 
108
  except Exception as e:
109
- return "", f"حدث خطأ: {str(e)}"
110
 
111
  # Define Gradio interface
112
- with gr.Blocks(title="RAWI Voice to Story Generator") as demo:
113
- gr.Markdown("# RAWI Voice to Story Generator")
114
- gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى قصة.")
115
 
116
  with gr.Row():
117
  audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
118
 
119
  with gr.Row():
120
- submit_btn = gr.Button("توليد القصة")
121
 
122
  with gr.Row():
123
  transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
124
- story_output = gr.Textbox(label="القصة المولدة")
125
 
126
  submit_btn.click(
127
  fn=gradio_process,
128
  inputs=audio_input,
129
- outputs=[transcript_output, story_output],
130
  )
131
 
132
  # Mount static files for frontend if they exist
 
1
  import os
2
  import tempfile
3
  import whisper
 
4
  import gradio as gr
5
  from fastapi import FastAPI, File, UploadFile
6
  from fastapi.responses import JSONResponse
 
10
  from pathlib import Path
11
 
12
  # Create FastAPI app
13
+ app = FastAPI(title="Speech to Text Model")
14
 
15
  # Configure CORS to allow requests from frontend
16
  app.add_middleware(
 
21
  allow_headers=["*"],
22
  )
23
 
 
 
 
 
24
  # Load Whisper model (can be tiny/base/small depending on hardware)
25
  model = whisper.load_model("base")
26
 
 
34
  tmp_path = tmp.name
35
 
36
  # Process the audio using our function
37
+ transcript = transcribe_audio(tmp_path)
38
 
39
  # Clean up temp file
40
  os.remove(tmp_path)
41
 
42
  # Return JSON response
43
  return JSONResponse({
44
+ "transcript": transcript
 
45
  })
46
  except Exception as e:
47
  return JSONResponse(
 
50
  )
51
 
52
  # Function for processing audio (used by both FastAPI and Gradio)
53
+ def transcribe_audio(audio_path):
54
  try:
55
  # Transcribe using Whisper
56
  result = model.transcribe(audio_path, language="ar")
57
  text = result.get("text", "")
58
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  except Exception as e:
60
+ return f"حدث خطأ: {str(e)}"
61
 
62
  # Gradio interface wrapper for the model
63
  def gradio_process(audio_file):
 
66
  audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
67
 
68
  # Process the audio
69
+ transcript = transcribe_audio(audio_path)
70
 
71
+ return transcript
72
 
73
  except Exception as e:
74
+ return f"حدث خطأ: {str(e)}"
75
 
76
  # Define Gradio interface
77
+ with gr.Blocks(title="Speech to Text Model") as demo:
78
+ gr.Markdown("# Speech to Text")
79
+ gr.Markdown("قم بتسجيل أو تحميل ملف صوتي باللغة العربية وسيقوم النظام بتحويله إلى نص.")
80
 
81
  with gr.Row():
82
  audio_input = gr.Audio(label="تسجيل أو تحميل صوت", type="filepath")
83
 
84
  with gr.Row():
85
+ submit_btn = gr.Button("تحويل إلى نص")
86
 
87
  with gr.Row():
88
  transcript_output = gr.Textbox(label="النص المستخرج من التسجيل الصوتي")
 
89
 
90
  submit_btn.click(
91
  fn=gradio_process,
92
  inputs=audio_input,
93
+ outputs=transcript_output,
94
  )
95
 
96
  # Mount static files for frontend if they exist
requirements.txt CHANGED
@@ -1,9 +1,8 @@
1
  gradio>=3.50.2
2
  openai-whisper==20231117
3
  torch==2.0.1
4
- requests==2.31.0
5
- ffmpeg-python==0.2.0
6
  fastapi==0.103.1
7
  uvicorn==0.23.2
8
  python-multipart==0.0.6
 
9
  --extra-index-url https://download.pytorch.org/whl/cpu
 
1
  gradio>=3.50.2
2
  openai-whisper==20231117
3
  torch==2.0.1
 
 
4
  fastapi==0.103.1
5
  uvicorn==0.23.2
6
  python-multipart==0.0.6
7
+ ffmpeg-python==0.2.0
8
  --extra-index-url https://download.pytorch.org/whl/cpu