danicor commited on
Commit
c093af0
·
verified ·
1 Parent(s): 43cc58e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -38
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, File, UploadFile, HTTPException
2
  from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import whisper
@@ -11,6 +11,8 @@ import hashlib
11
  import json
12
  import sqlite3
13
  from datetime import datetime
 
 
14
 
15
  # تنظیم لاگ
16
  logging.basicConfig(level=logging.INFO)
@@ -31,7 +33,7 @@ logger.info(f"Loading model on {device}")
31
  model = whisper.load_model("large-v3", device=device)
32
  logger.info("Model loaded successfully")
33
 
34
- # ایجاد دیتابیس کش
35
  def init_cache_db():
36
  conn = sqlite3.connect('transcription_cache.db')
37
  cursor = conn.cursor()
@@ -45,6 +47,22 @@ def init_cache_db():
45
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
46
  )
47
  ''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  conn.commit()
49
  conn.close()
50
 
@@ -80,12 +98,140 @@ def save_to_cache(file_hash, filename, file_size, transcription):
80
  except Exception as e:
81
  logger.error(f"Error saving to cache: {e}")
82
 
83
- # پاک کردن کش قدیمی یش از 30 روز)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def cleanup_old_cache():
85
  try:
86
  conn = sqlite3.connect('transcription_cache.db')
87
  cursor = conn.cursor()
88
  cursor.execute("DELETE FROM cache WHERE created_at < datetime('now', '-30 days')")
 
89
  conn.commit()
90
  conn.close()
91
  except Exception as e:
@@ -100,16 +246,19 @@ async def root():
100
  cursor = conn.cursor()
101
  cursor.execute('SELECT COUNT(*) FROM cache')
102
  cache_count = cursor.fetchone()[0]
 
 
103
  conn.close()
104
 
105
  return {
106
  "message": "Whisper API is running",
107
  "device": device,
108
- "cached_files": cache_count
 
109
  }
110
 
111
  @app.post("/transcribe")
112
- async def transcribe_audio(file: UploadFile = File(...)):
113
  tmp_file_path = None
114
 
115
  try:
@@ -120,7 +269,9 @@ async def transcribe_audio(file: UploadFile = File(...)):
120
 
121
  contents = await file.read()
122
  file_size = len(contents)
123
- logger.info(f"File read successfully, size: {file_size} bytes")
 
 
124
 
125
  if file_size > 50 * 1024 * 1024:
126
  raise HTTPException(status_code=413, detail="File too large")
@@ -136,52 +287,80 @@ async def transcribe_audio(file: UploadFile = File(...)):
136
  cached_result = get_from_cache(file_hash)
137
  if cached_result:
138
  logger.info("Found in cache, returning cached result")
 
 
139
  return JSONResponse({
140
  "text": cached_result,
141
  "from_cache": True,
142
  "message": "نتیجه از کش بازگردانده شد"
143
  })
144
 
145
- logger.info("Not found in cache, processing...")
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # تشخیص فرمت فایل
148
  file_ext = os.path.splitext(file.filename)[1].lower()
149
  if not file_ext:
150
  file_ext = ".wav"
151
 
 
152
  with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file:
153
  tmp_file.write(contents)
154
  tmp_file_path = tmp_file.name
155
 
156
  logger.info(f"Temp file created: {tmp_file_path}")
157
 
158
- result = model.transcribe(
159
- tmp_file_path,
160
- fp16=False if device == "cpu" else True,
161
- language=None,
162
- task="transcribe",
163
- verbose=False,
164
- word_timestamps=False
165
- )
166
-
167
- logger.info("Transcription completed")
168
-
169
- text = result["text"].strip()
170
- if not text:
171
- text = "متن شناسایی نشد"
172
-
173
- # ذخیره در کش
174
- save_to_cache(file_hash, file.filename, file_size, text)
175
- logger.info("Result saved to cache")
176
 
177
- # پاک کردن کش قدیمی
178
- cleanup_old_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- return JSONResponse({
181
- "text": text,
182
- "from_cache": False,
183
- "message": "پردازش جدید انجام شد و در کش ذخیره شد"
184
- })
 
 
 
 
 
 
 
 
185
 
186
  except Exception as e:
187
  logger.error(f"Error in transcription: {str(e)}")
@@ -195,13 +374,47 @@ async def transcribe_audio(file: UploadFile = File(...)):
195
  raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
196
 
197
  finally:
198
- if tmp_file_path and os.path.exists(tmp_file_path):
 
199
  try:
200
  os.unlink(tmp_file_path)
201
  logger.info(f"Temp file deleted: {tmp_file_path}")
202
  except:
203
  pass
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  @app.get("/cache/stats")
206
  async def cache_stats():
207
  try:
@@ -217,18 +430,19 @@ async def cache_stats():
217
  cursor.execute('SELECT AVG(LENGTH(transcription)) FROM cache')
218
  avg_text_length = cursor.fetchone()[0] or 0
219
 
 
 
 
220
  conn.close()
221
 
222
  return {
223
  "total_cached_files": total_count,
224
  "cached_today": today_count,
225
- "average_text_length": int(avg_text_length)
 
226
  }
227
  except Exception as e:
228
  return {"error": str(e)}
229
 
230
  if __name__ == "__main__":
231
- uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=900)
232
-
233
- if __name__ == "__main__":
234
- uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=300)
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
2
  from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import whisper
 
11
  import json
12
  import sqlite3
13
  from datetime import datetime
14
+ import threading
15
+ import time
16
 
17
  # تنظیم لاگ
18
  logging.basicConfig(level=logging.INFO)
 
33
  model = whisper.load_model("large-v3", device=device)
34
  logger.info("Model loaded successfully")
35
 
36
+ # ایجاد دیتابیس کش و وضعیت
37
  def init_cache_db():
38
  conn = sqlite3.connect('transcription_cache.db')
39
  cursor = conn.cursor()
 
47
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
48
  )
49
  ''')
50
+
51
+ # جدول جدید برای ردیابی وضعیت پردازش
52
+ cursor.execute('''
53
+ CREATE TABLE IF NOT EXISTS processing_status (
54
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
55
+ file_hash TEXT UNIQUE,
56
+ filename TEXT,
57
+ file_size INTEGER,
58
+ status TEXT DEFAULT 'processing',
59
+ progress INTEGER DEFAULT 0,
60
+ estimated_time INTEGER DEFAULT 0,
61
+ started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
62
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
63
+ )
64
+ ''')
65
+
66
  conn.commit()
67
  conn.close()
68
 
 
98
  except Exception as e:
99
  logger.error(f"Error saving to cache: {e}")
100
 
101
+ # بررسی وضعیت پردازش
102
+ def get_processing_status(file_hash):
103
+ try:
104
+ conn = sqlite3.connect('transcription_cache.db')
105
+ cursor = conn.cursor()
106
+ cursor.execute('''
107
+ SELECT status, progress, estimated_time,
108
+ (julianday('now') - julianday(started_at)) * 24 * 60 as elapsed_minutes
109
+ FROM processing_status WHERE file_hash = ?
110
+ ''', (file_hash,))
111
+ result = cursor.fetchone()
112
+ conn.close()
113
+ if result:
114
+ return {
115
+ 'status': result[0],
116
+ 'progress': result[1],
117
+ 'estimated_time': result[2],
118
+ 'elapsed_minutes': int(result[3])
119
+ }
120
+ return None
121
+ except:
122
+ return None
123
+
124
+ # به‌روزرسانی وضعیت پردازش
125
+ def update_processing_status(file_hash, status=None, progress=None, estimated_time=None):
126
+ try:
127
+ conn = sqlite3.connect('transcription_cache.db')
128
+ cursor = conn.cursor()
129
+
130
+ updates = []
131
+ params = []
132
+
133
+ if status:
134
+ updates.append("status = ?")
135
+ params.append(status)
136
+ if progress is not None:
137
+ updates.append("progress = ?")
138
+ params.append(progress)
139
+ if estimated_time is not None:
140
+ updates.append("estimated_time = ?")
141
+ params.append(estimated_time)
142
+
143
+ updates.append("updated_at = CURRENT_TIMESTAMP")
144
+ params.append(file_hash)
145
+
146
+ query = f"UPDATE processing_status SET {', '.join(updates)} WHERE file_hash = ?"
147
+ cursor.execute(query, params)
148
+ conn.commit()
149
+ conn.close()
150
+ except Exception as e:
151
+ logger.error(f"Error updating status: {e}")
152
+
153
+ # افزودن وضعیت پردازش جدید
154
+ def add_processing_status(file_hash, filename, file_size, estimated_time):
155
+ try:
156
+ conn = sqlite3.connect('transcription_cache.db')
157
+ cursor = conn.cursor()
158
+ cursor.execute('''
159
+ INSERT OR REPLACE INTO processing_status
160
+ (file_hash, filename, file_size, status, progress, estimated_time)
161
+ VALUES (?, ?, ?, 'processing', 0, ?)
162
+ ''', (file_hash, filename, file_size, estimated_time))
163
+ conn.commit()
164
+ conn.close()
165
+ except Exception as e:
166
+ logger.error(f"Error adding processing status: {e}")
167
+
168
+ # حذف وضعیت پردازش
169
+ def remove_processing_status(file_hash):
170
+ try:
171
+ conn = sqlite3.connect('transcription_cache.db')
172
+ cursor = conn.cursor()
173
+ cursor.execute('DELETE FROM processing_status WHERE file_hash = ?', (file_hash,))
174
+ conn.commit()
175
+ conn.close()
176
+ except Exception as e:
177
+ logger.error(f"Error removing processing status: {e}")
178
+
179
+ # تخمین زمان پردازش بر اساس سایز فایل (بر حسب دقیقه)
180
+ def estimate_processing_time(file_size_mb):
181
+ # تخمین تقریبی: هر MB حدود 30 ثانیه روی CPU
182
+ estimated_seconds = file_size_mb * 30
183
+ return int(estimated_seconds / 60) + 1 # به دقیقه تبدیل + 1 دقیقه امان
184
+
185
+ # پردازش پس‌زمینه
186
+ def background_transcription(file_path, file_hash, filename, file_size):
187
+ try:
188
+ logger.info(f"Starting background transcription for {filename}")
189
+
190
+ # شروع پردازش
191
+ update_processing_status(file_hash, status='processing', progress=10)
192
+
193
+ result = model.transcribe(
194
+ file_path,
195
+ fp16=False if device == "cpu" else True,
196
+ language=None,
197
+ task="transcribe",
198
+ verbose=False,
199
+ word_timestamps=False
200
+ )
201
+
202
+ update_processing_status(file_hash, progress=80)
203
+
204
+ text = result["text"].strip()
205
+ if not text:
206
+ text = "متن شناسایی نشد"
207
+
208
+ # ذخیره در کش
209
+ save_to_cache(file_hash, filename, file_size, text)
210
+
211
+ # تکمیل پردازش
212
+ update_processing_status(file_hash, status='completed', progress=100)
213
+
214
+ logger.info(f"Background transcription completed for {filename}")
215
+
216
+ except Exception as e:
217
+ logger.error(f"Error in background transcription: {e}")
218
+ update_processing_status(file_hash, status='error', progress=0)
219
+
220
+ finally:
221
+ # حذف فایل موقت
222
+ if os.path.exists(file_path):
223
+ try:
224
+ os.unlink(file_path)
225
+ except:
226
+ pass
227
+
228
+ # پاک کردن کش قدیمی
229
  def cleanup_old_cache():
230
  try:
231
  conn = sqlite3.connect('transcription_cache.db')
232
  cursor = conn.cursor()
233
  cursor.execute("DELETE FROM cache WHERE created_at < datetime('now', '-30 days')")
234
+ cursor.execute("DELETE FROM processing_status WHERE started_at < datetime('now', '-1 days')")
235
  conn.commit()
236
  conn.close()
237
  except Exception as e:
 
246
  cursor = conn.cursor()
247
  cursor.execute('SELECT COUNT(*) FROM cache')
248
  cache_count = cursor.fetchone()[0]
249
+ cursor.execute('SELECT COUNT(*) FROM processing_status WHERE status = "processing"')
250
+ processing_count = cursor.fetchone()[0]
251
  conn.close()
252
 
253
  return {
254
  "message": "Whisper API is running",
255
  "device": device,
256
+ "cached_files": cache_count,
257
+ "currently_processing": processing_count
258
  }
259
 
260
  @app.post("/transcribe")
261
+ async def transcribe_audio(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
262
  tmp_file_path = None
263
 
264
  try:
 
269
 
270
  contents = await file.read()
271
  file_size = len(contents)
272
+ file_size_mb = file_size / (1024 * 1024)
273
+
274
+ logger.info(f"File read successfully, size: {file_size} bytes ({file_size_mb:.1f} MB)")
275
 
276
  if file_size > 50 * 1024 * 1024:
277
  raise HTTPException(status_code=413, detail="File too large")
 
287
  cached_result = get_from_cache(file_hash)
288
  if cached_result:
289
  logger.info("Found in cache, returning cached result")
290
+ # حذف وضعیت پردازش اگر وجود دارد
291
+ remove_processing_status(file_hash)
292
  return JSONResponse({
293
  "text": cached_result,
294
  "from_cache": True,
295
  "message": "نتیجه از کش بازگردانده شد"
296
  })
297
 
298
+ # بررسی وضعیت پردازش فعلی
299
+ processing_status = get_processing_status(file_hash)
300
+ if processing_status:
301
+ logger.info("File is currently being processed")
302
+ return JSONResponse({
303
+ "status": "processing",
304
+ "progress": processing_status['progress'],
305
+ "estimated_time": processing_status['estimated_time'],
306
+ "elapsed_minutes": processing_status['elapsed_minutes'],
307
+ "message": f"فایل در حال پردازش است. لطفا {processing_status['estimated_time'] - processing_status['elapsed_minutes']} دقیقه صبر کنید"
308
+ })
309
+
310
+ logger.info("Starting new processing...")
311
 
312
  # تشخیص فرمت فایل
313
  file_ext = os.path.splitext(file.filename)[1].lower()
314
  if not file_ext:
315
  file_ext = ".wav"
316
 
317
+ # ذخیره فایل موقت
318
  with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file:
319
  tmp_file.write(contents)
320
  tmp_file_path = tmp_file.name
321
 
322
  logger.info(f"Temp file created: {tmp_file_path}")
323
 
324
+ # تخمین زمان پردازش
325
+ estimated_time = estimate_processing_time(file_size_mb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
+ # فایل کوچک (کمتر از 5MB) - پردازش فوری
328
+ if file_size_mb < 5:
329
+ result = model.transcribe(
330
+ tmp_file_path,
331
+ fp16=False if device == "cpu" else True,
332
+ language=None,
333
+ task="transcribe",
334
+ verbose=False,
335
+ word_timestamps=False
336
+ )
337
+
338
+ text = result["text"].strip()
339
+ if not text:
340
+ text = "متن شناسایی نشد"
341
+
342
+ # ذخیره در کش
343
+ save_to_cache(file_hash, file.filename, file_size, text)
344
+
345
+ return JSONResponse({
346
+ "text": text,
347
+ "from_cache": False,
348
+ "message": "پردازش جدید انجام شد و در کش ذخیره شد"
349
+ })
350
 
351
+ else:
352
+ # فایل بزرگ - پردازش پس‌زمینه
353
+ add_processing_status(file_hash, file.filename, file_size, estimated_time)
354
+
355
+ # شروع پردازش پس‌زمینه
356
+ background_tasks.add_task(background_transcription, tmp_file_path, file_hash, file.filename, file_size)
357
+
358
+ return JSONResponse({
359
+ "status": "processing_started",
360
+ "estimated_time": estimated_time,
361
+ "file_hash": file_hash,
362
+ "message": f"پردازش شروع شد. حدود {estimated_time} دقیقه طول می‌کشد. می‌توانید بعدا نتیجه را بررسی کنید"
363
+ })
364
 
365
  except Exception as e:
366
  logger.error(f"Error in transcription: {str(e)}")
 
374
  raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
375
 
376
  finally:
377
+ # فقط فایل‌های کوچک را فوری پاک کن
378
+ if tmp_file_path and os.path.exists(tmp_file_path) and file_size < 5 * 1024 * 1024:
379
  try:
380
  os.unlink(tmp_file_path)
381
  logger.info(f"Temp file deleted: {tmp_file_path}")
382
  except:
383
  pass
384
 
385
+ @app.get("/status/{file_hash}")
386
+ async def check_status(file_hash: str):
387
+ """بررسی وضعیت پردازش فایل"""
388
+
389
+ # ابتدا چک کن نتیجه در کش هست یا نه
390
+ cached_result = get_from_cache(file_hash)
391
+ if cached_result:
392
+ remove_processing_status(file_hash)
393
+ return JSONResponse({
394
+ "status": "completed",
395
+ "text": cached_result,
396
+ "from_cache": True,
397
+ "message": "پردازش تکمیل شده و نتیجه آماده است"
398
+ })
399
+
400
+ # بررسی وضعیت پردازش
401
+ processing_status = get_processing_status(file_hash)
402
+ if processing_status:
403
+ remaining_time = max(0, processing_status['estimated_time'] - processing_status['elapsed_minutes'])
404
+ return JSONResponse({
405
+ "status": processing_status['status'],
406
+ "progress": processing_status['progress'],
407
+ "elapsed_minutes": processing_status['elapsed_minutes'],
408
+ "estimated_time": processing_status['estimated_time'],
409
+ "remaining_time": remaining_time,
410
+ "message": f"در حال پردازش... حدود {remaining_time} دقیقه باقی مانده"
411
+ })
412
+
413
+ return JSONResponse({
414
+ "status": "not_found",
415
+ "message": "فایل یافت نشد"
416
+ })
417
+
418
  @app.get("/cache/stats")
419
  async def cache_stats():
420
  try:
 
430
  cursor.execute('SELECT AVG(LENGTH(transcription)) FROM cache')
431
  avg_text_length = cursor.fetchone()[0] or 0
432
 
433
+ cursor.execute('SELECT COUNT(*) FROM processing_status WHERE status = "processing"')
434
+ processing_count = cursor.fetchone()[0]
435
+
436
  conn.close()
437
 
438
  return {
439
  "total_cached_files": total_count,
440
  "cached_today": today_count,
441
+ "average_text_length": int(avg_text_length),
442
+ "currently_processing": processing_count
443
  }
444
  except Exception as e:
445
  return {"error": str(e)}
446
 
447
  if __name__ == "__main__":
448
+ uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=900)