saeez mohz commited on
Commit
68c67f5
·
verified ·
1 Parent(s): 8e0990c

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +230 -0
main.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ import torch
4
+ from transformers import pipeline
5
+ import json
6
+ import os
7
+ from difflib import SequenceMatcher
8
+ from typing import Dict, Any, Optional
9
+ import tempfile
10
+
11
+ app = FastAPI(
12
+ title="Quranic Verse Recognition API",
13
+ description="POST an audio file to /recognize → get JSON with transcription and best-matching Surah/Ayah.",
14
+ version="1.0.0"
15
+ )
16
+
17
+ # CPU only on free tier
18
+ device = -1
19
+
20
+ # Load Whisper pipeline (model downloads/caches automatically on first run)
21
+ pipe = pipeline(
22
+ "automatic-speech-recognition",
23
+ model="tarteel-ai/whisper-tiny-ar-quran",
24
+ device=device,
25
+ )
26
+
27
+ # Standard Surah names (1–114)
28
+ surah_names = {
29
+ 1: "Al-Fatiha (الفاتحة)",
30
+ 2: "Al-Baqarah (البقرة)",
31
+ 3: "Aal-E-Imran (آل عمران)",
32
+ 4: "An-Nisa (النساء)",
33
+ 5: "Al-Maidah (المائدة)",
34
+ 6: "Al-An'am (الأنعام)",
35
+ 7: "Al-A'raf (الأعراف)",
36
+ 8: "Al-Anfal (الأنفال)",
37
+ 9: "At-Tawbah (التوبة)",
38
+ 10: "Yunus (يونس)",
39
+ 11: "Hud (هود)",
40
+ 12: "Yusuf (يوسف)",
41
+ 13: "Ar-Ra'd (الرعد)",
42
+ 14: "Ibrahim (إبراهيم)",
43
+ 15: "Al-Hijr (الحجر)",
44
+ 16: "An-Nahl (النحل)",
45
+ 17: "Al-Isra (الإسراء)",
46
+ 18: "Al-Kahf (الكهف)",
47
+ 19: "Maryam (مريم)",
48
+ 20: "Ta-Ha (طه)",
49
+ 21: "Al-Anbiya (الأنبياء)",
50
+ 22: "Al-Hajj (الحج)",
51
+ 23: "Al-Mu'minun (المؤمنون)",
52
+ 24: "An-Nur (النور)",
53
+ 25: "Al-Furqan (الفرقان)",
54
+ 26: "Ash-Shu'ara (الشعراء)",
55
+ 27: "An-Naml (النمل)",
56
+ 28: "Al-Qasas (القصص)",
57
+ 29: "Al-Ankabut (العنكبوت)",
58
+ 30: "Ar-Rum (الروم)",
59
+ 31: "Luqman (لقمان)",
60
+ 32: "As-Sajdah (السجدة)",
61
+ 33: "Al-Ahzab (الأحزاب)",
62
+ 34: "Saba (سبأ)",
63
+ 35: "Fatir (فاطر)",
64
+ 36: "Ya-Sin (يس)",
65
+ 37: "As-Saffat (الصافات)",
66
+ 38: "Sad (ص)",
67
+ 39: "Az-Zumar (الزمر)",
68
+ 40: "Ghafir (غافر)",
69
+ 41: "Fussilat (فصلت)",
70
+ 42: "Ash-Shura (الشورى)",
71
+ 43: "Az-Zukhruf (الزخرف)",
72
+ 44: "Ad-Dukhkhan (الدخان)",
73
+ 45: "Al-Jathiya (الجاثية)",
74
+ 46: "Al-Ahqaf (الأحقاف)",
75
+ 47: "Muhammad (محمد)",
76
+ 48: "Al-Fath (الفتح)",
77
+ 49: "Al-Hujurat (الحجرات)",
78
+ 50: "Qaf (ق)",
79
+ 51: "Adh-Dhariyat (الذاريات)",
80
+ 52: "At-Tur (الطور)",
81
+ 53: "An-Najm (النجم)",
82
+ 54: "Al-Qamar (القمر)",
83
+ 55: "Ar-Rahman (الرحمن)",
84
+ 56: "Al-Waqi'ah (الواقعة)",
85
+ 57: "Al-Hadid (الحديد)",
86
+ 58: "Al-Mujadila (المجادلة)",
87
+ 59: "Al-Hashr (الحشر)",
88
+ 60: "Al-Mumtahina (الممتحنة)",
89
+ 61: "As-Saff (الصف)",
90
+ 62: "Al-Jumu'ah (الجمعة)",
91
+ 63: "Al-Munafiqoon (المنافقون)",
92
+ 64: "At-Taghabun (التغابن)",
93
+ 65: "At-Talaq (الطلاق)",
94
+ 66: "At-Tahrim (التحريم)",
95
+ 67: "Al-Mulk (الملك)",
96
+ 68: "Al-Qalam (القلم)",
97
+ 69: "Al-Haqqah (الحاقة)",
98
+ 70: "Al-Ma'arij (المعارج)",
99
+ 71: "Nooh (نوح)",
100
+ 72: "Al-Jinn (الجن)",
101
+ 73: "Al-Muzzammil (المزمل)",
102
+ 74: "Al-Muddathir (المدثر)",
103
+ 75: "Al-Qiyamah (القيامة)",
104
+ 76: "Al-Insan (الإنسان)",
105
+ 77: "Al-Mursalat (المرسلات)",
106
+ 78: "An-Naba (النبأ)",
107
+ 79: "An-Nazi'at (النازعات)",
108
+ 80: "Abasa (عبس)",
109
+ 81: "At-Takwir (التكوير)",
110
+ 82: "Al-Infitar (الإنفطار)",
111
+ 83: "Al-Mutaffifin (المطففين)",
112
+ 84: "Al-Inshiqaq (الإنشقاق)",
113
+ 85: "Al-Buruj (البروج)",
114
+ 86: "At-Tariq (الطارق)",
115
+ 87: "Al-A'la (الأعلى)",
116
+ 88: "Al-Ghashiyah (الغاشية)",
117
+ 89: "Al-Fajr (الفجر)",
118
+ 90: "Al-Balad (البلد)",
119
+ 91: "Ash-Shams (الشمس)",
120
+ 92: "Al-Lail (الليل)",
121
+ 93: "Ad-Duha (الضحى)",
122
+ 94: "Ash-Sharh (الشرح)",
123
+ 95: "At-Tin (التين)",
124
+ 96: "Al-Alaq (العلق)",
125
+ 97: "Al-Qadr (القدر)",
126
+ 98: "Al-Bayyina (البينة)",
127
+ 99: "Az-Zalzalah (الزلزلة)",
128
+ 100: "Al-Adiyat (العاديات)",
129
+ 101: "Al-Qari'ah (القارعة)",
130
+ 102: "At-Takathur (التكاثر)",
131
+ 103: "Al-Asr (العصر)",
132
+ 104: "Al-Humazah (الهمزة)",
133
+ 105: "Al-Fil (الفيل)",
134
+ 106: "Quraish (قريش)",
135
+ 107: "Al-Ma'un (الماعون)",
136
+ 108: "Al-Kawthar (الكوثر)",
137
+ 109: "Al-Kafirun (الكافرون)",
138
+ 110: "An-Nasr (النصر)",
139
+ 111: "Al-Masad (المسد)",
140
+ 112: "Al-Ikhlas (الإخلاص)",
141
+ 113: "Al-Falaq (الفلق)",
142
+ 114: "An-Nas (الناس)",
143
+ }
144
+
145
+ # Pre-load all verses at startup
146
+ all_verses = []
147
+
148
+ surahs_dir = "surahs_json_files"
149
+ if not os.path.isdir(surahs_dir):
150
+ raise FileNotFoundError("Missing 'surahs_json_files/' folder. Upload it from the original repo.")
151
+
152
+ for filename in sorted(os.listdir(surahs_dir)):
153
+ if filename.endswith(".json"):
154
+ try:
155
+ surah_number = int(filename.split("_")[0])
156
+ except:
157
+ continue
158
+ surah_name = surah_names.get(surah_number, f"Surah {surah_number}")
159
+ file_path = os.path.join(surahs_dir, filename)
160
+
161
+ with open(file_path, "r", encoding="utf-8") as f:
162
+ data = json.load(f)
163
+
164
+ verses = [ayah["text"] for ayah in data.get("ayahs", []) if "text" in ayah]
165
+
166
+ for ayah_number, verse_text in enumerate(verses, start=1):
167
+ all_verses.append((surah_number, surah_name, ayah_number, verse_text))
168
+
169
+ print(f"Loaded {len(all_verses)} verses from {len(os.listdir(surahs_dir))} surahs.")
170
+
171
+ def normalize_text(text: str) -> str:
172
+ return " ".join(text.strip().split())
173
+
174
+ def find_best_verse(transcription: str) -> Dict[str, Any]:
175
+ transcription_norm = normalize_text(transcription)
176
+ if not transcription_norm:
177
+ return {"error": "Empty transcription"}
178
+
179
+ best_ratio = 0.0
180
+ best_match: Optional[Dict[str, Any]] = None
181
+
182
+ for surah_number, surah_name, ayah_number, verse_text in all_verses:
183
+ verse_norm = normalize_text(verse_text)
184
+ ratio = SequenceMatcher(None, transcription_norm, verse_norm).ratio()
185
+
186
+ if ratio > best_ratio:
187
+ best_ratio = ratio
188
+ best_match = {
189
+ "surah_number": surah_number,
190
+ "surah_name": surah_name,
191
+ "ayah_number": ayah_number,
192
+ "verse_text": verse_text,
193
+ "similarity": round(ratio, 4)
194
+ }
195
+
196
+ if best_match and best_ratio >= 0.75: # Adjustable threshold
197
+ return best_match
198
+ else:
199
+ return {
200
+ "error": "No confident match found",
201
+ "best_similarity": round(best_ratio, 4) if best_match else 0.0,
202
+ "possible_match": best_match
203
+ }
204
+
205
+ @app.get("/")
206
+ def root():
207
+ return {"message": "Quranic Verse Recognition API running. POST audio to /recognize"}
208
+
209
+ @app.post("/recognize")
210
+ async def recognize(file: UploadFile = File(...)):
211
+ if not file.content_type or not file.content_type.startswith("audio/"):
212
+ raise HTTPException(status_code=400, detail="File must be an audio file")
213
+
214
+ # Save to temp file (pipeline accepts file path directly)
215
+ contents = await file.read()
216
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1] or ".wav") as tmp:
217
+ tmp.write(contents)
218
+ tmp_path = tmp.name
219
+
220
+ try:
221
+ transcription = pipe(tmp_path)["text"]
222
+ except Exception as e:
223
+ raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
224
+ finally:
225
+ os.unlink(tmp_path)
226
+
227
+ result = find_best_verse(transcription)
228
+ result["transcription"] = transcription
229
+
230
+ return JSONResponse(content=result)