danicor commited on
Commit
43cc58e
·
verified ·
1 Parent(s): 9faea22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -4
app.py CHANGED
@@ -7,6 +7,10 @@ import tempfile
7
  import os
8
  import uvicorn
9
  import logging
 
 
 
 
10
 
11
  # تنظیم لاگ
12
  logging.basicConfig(level=logging.INFO)
@@ -27,9 +31,82 @@ logger.info(f"Loading model on {device}")
27
  model = whisper.load_model("large-v3", device=device)
28
  logger.info("Model loaded successfully")
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  @app.get("/")
31
  async def root():
32
- return {"message": "Whisper API is running", "device": device}
 
 
 
 
 
 
 
 
 
 
33
 
34
  @app.post("/transcribe")
35
  async def transcribe_audio(file: UploadFile = File(...)):
@@ -51,6 +128,22 @@ async def transcribe_audio(file: UploadFile = File(...)):
51
  if file_size == 0:
52
  raise HTTPException(status_code=400, detail="Empty file")
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # تشخیص فرمت فایل
55
  file_ext = os.path.splitext(file.filename)[1].lower()
56
  if not file_ext:
@@ -75,9 +168,20 @@ async def transcribe_audio(file: UploadFile = File(...)):
75
 
76
  text = result["text"].strip()
77
  if not text:
78
- return JSONResponse({"text": "متن شناسایی نشد", "warning": "No speech detected"})
 
 
 
 
79
 
80
- return JSONResponse({"text": text})
 
 
 
 
 
 
 
81
 
82
  except Exception as e:
83
  logger.error(f"Error in transcription: {str(e)}")
@@ -98,8 +202,33 @@ async def transcribe_audio(file: UploadFile = File(...)):
98
  except:
99
  pass
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  if __name__ == "__main__":
102
  uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=900)
103
 
104
  if __name__ == "__main__":
105
- uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=900)
 
7
  import os
8
  import uvicorn
9
  import logging
10
+ import hashlib
11
+ import json
12
+ import sqlite3
13
+ from datetime import datetime
14
 
15
  # تنظیم لاگ
16
  logging.basicConfig(level=logging.INFO)
 
31
  model = whisper.load_model("large-v3", device=device)
32
  logger.info("Model loaded successfully")
33
 
34
+ # ایجاد دیتابیس کش
35
+ def init_cache_db():
36
+ conn = sqlite3.connect('transcription_cache.db')
37
+ cursor = conn.cursor()
38
+ cursor.execute('''
39
+ CREATE TABLE IF NOT EXISTS cache (
40
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
41
+ file_hash TEXT UNIQUE,
42
+ filename TEXT,
43
+ file_size INTEGER,
44
+ transcription TEXT,
45
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
46
+ )
47
+ ''')
48
+ conn.commit()
49
+ conn.close()
50
+
51
+ # محاسبه هش فایل
52
+ def calculate_file_hash(content, filename, file_size):
53
+ hash_input = f"{filename}_{file_size}_{len(content)}"
54
+ file_hash = hashlib.md5(content[:1024] + content[-1024:] + hash_input.encode()).hexdigest()
55
+ return file_hash
56
+
57
+ # جستجو در کش
58
+ def get_from_cache(file_hash):
59
+ try:
60
+ conn = sqlite3.connect('transcription_cache.db')
61
+ cursor = conn.cursor()
62
+ cursor.execute('SELECT transcription FROM cache WHERE file_hash = ?', (file_hash,))
63
+ result = cursor.fetchone()
64
+ conn.close()
65
+ return result[0] if result else None
66
+ except:
67
+ return None
68
+
69
+ # ذخیره در کش
70
+ def save_to_cache(file_hash, filename, file_size, transcription):
71
+ try:
72
+ conn = sqlite3.connect('transcription_cache.db')
73
+ cursor = conn.cursor()
74
+ cursor.execute('''
75
+ INSERT OR REPLACE INTO cache (file_hash, filename, file_size, transcription)
76
+ VALUES (?, ?, ?, ?)
77
+ ''', (file_hash, filename, file_size, transcription))
78
+ conn.commit()
79
+ conn.close()
80
+ except Exception as e:
81
+ logger.error(f"Error saving to cache: {e}")
82
+
83
+ # پاک کردن کش قدیمی (بیش از 30 روز)
84
+ def cleanup_old_cache():
85
+ try:
86
+ conn = sqlite3.connect('transcription_cache.db')
87
+ cursor = conn.cursor()
88
+ cursor.execute("DELETE FROM cache WHERE created_at < datetime('now', '-30 days')")
89
+ conn.commit()
90
+ conn.close()
91
+ except Exception as e:
92
+ logger.error(f"Error cleaning cache: {e}")
93
+
94
+ # راه‌اندازی دیتابیس
95
+ init_cache_db()
96
+
97
  @app.get("/")
98
  async def root():
99
+ conn = sqlite3.connect('transcription_cache.db')
100
+ cursor = conn.cursor()
101
+ cursor.execute('SELECT COUNT(*) FROM cache')
102
+ cache_count = cursor.fetchone()[0]
103
+ conn.close()
104
+
105
+ return {
106
+ "message": "Whisper API is running",
107
+ "device": device,
108
+ "cached_files": cache_count
109
+ }
110
 
111
  @app.post("/transcribe")
112
  async def transcribe_audio(file: UploadFile = File(...)):
 
128
  if file_size == 0:
129
  raise HTTPException(status_code=400, detail="Empty file")
130
 
131
+ # محاسبه هش فایل
132
+ file_hash = calculate_file_hash(contents, file.filename, file_size)
133
+ logger.info(f"File hash: {file_hash}")
134
+
135
+ # جستجو در کش
136
+ cached_result = get_from_cache(file_hash)
137
+ if cached_result:
138
+ logger.info("Found in cache, returning cached result")
139
+ return JSONResponse({
140
+ "text": cached_result,
141
+ "from_cache": True,
142
+ "message": "نتیجه از کش بازگردانده شد"
143
+ })
144
+
145
+ logger.info("Not found in cache, processing...")
146
+
147
  # تشخیص فرمت فایل
148
  file_ext = os.path.splitext(file.filename)[1].lower()
149
  if not file_ext:
 
168
 
169
  text = result["text"].strip()
170
  if not text:
171
+ text = "متن شناسایی نشد"
172
+
173
+ # ذخیره در کش
174
+ save_to_cache(file_hash, file.filename, file_size, text)
175
+ logger.info("Result saved to cache")
176
 
177
+ # پاک کردن کش قدیمی
178
+ cleanup_old_cache()
179
+
180
+ return JSONResponse({
181
+ "text": text,
182
+ "from_cache": False,
183
+ "message": "پردازش جدید انجام شد و در کش ذخیره شد"
184
+ })
185
 
186
  except Exception as e:
187
  logger.error(f"Error in transcription: {str(e)}")
 
202
  except:
203
  pass
204
 
205
+ @app.get("/cache/stats")
206
+ async def cache_stats():
207
+ try:
208
+ conn = sqlite3.connect('transcription_cache.db')
209
+ cursor = conn.cursor()
210
+
211
+ cursor.execute('SELECT COUNT(*) FROM cache')
212
+ total_count = cursor.fetchone()[0]
213
+
214
+ cursor.execute('SELECT COUNT(*) FROM cache WHERE created_at >= datetime("now", "-1 day")')
215
+ today_count = cursor.fetchone()[0]
216
+
217
+ cursor.execute('SELECT AVG(LENGTH(transcription)) FROM cache')
218
+ avg_text_length = cursor.fetchone()[0] or 0
219
+
220
+ conn.close()
221
+
222
+ return {
223
+ "total_cached_files": total_count,
224
+ "cached_today": today_count,
225
+ "average_text_length": int(avg_text_length)
226
+ }
227
+ except Exception as e:
228
+ return {"error": str(e)}
229
+
230
  if __name__ == "__main__":
231
  uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=900)
232
 
233
  if __name__ == "__main__":
234
+ uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=300)