Spaces:

sae8d
/

bayan-ai

Sleeping

App Files Files Community

saeez mohz commited on Feb 6

Commit

68c67f5

verified ·

1 Parent(s): 8e0990c

Create main.py

Browse files

Files changed (1) hide show

main.py +230 -0

main.py ADDED Viewed

	@@ -0,0 +1,230 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import JSONResponse
+import torch
+from transformers import pipeline
+import json
+import os
+from difflib import SequenceMatcher
+from typing import Dict, Any, Optional
+import tempfile
+app = FastAPI(
+    title="Quranic Verse Recognition API",
+    description="POST an audio file to /recognize → get JSON with transcription and best-matching Surah/Ayah.",
+    version="1.0.0"
+)
+# CPU only on free tier
+device = -1
+# Load Whisper pipeline (model downloads/caches automatically on first run)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model="tarteel-ai/whisper-tiny-ar-quran",
+    device=device,
+)
+# Standard Surah names (1–114)
+surah_names = {
+    1: "Al-Fatiha (الفاتحة)",
+    2: "Al-Baqarah (البقرة)",
+    3: "Aal-E-Imran (آل عمران)",
+    4: "An-Nisa (النساء)",
+    5: "Al-Maidah (المائدة)",
+    6: "Al-An'am (الأنعام)",
+    7: "Al-A'raf (الأعراف)",
+    8: "Al-Anfal (الأنفال)",
+    9: "At-Tawbah (التوبة)",
+    10: "Yunus (يونس)",
+    11: "Hud (هود)",
+    12: "Yusuf (يوسف)",
+    13: "Ar-Ra'd (الرعد)",
+    14: "Ibrahim (إبراهيم)",
+    15: "Al-Hijr (الحجر)",
+    16: "An-Nahl (النحل)",
+    17: "Al-Isra (الإسراء)",
+    18: "Al-Kahf (الكهف)",
+    19: "Maryam (مريم)",
+    20: "Ta-Ha (طه)",
+    21: "Al-Anbiya (الأنبياء)",
+    22: "Al-Hajj (الحج)",
+    23: "Al-Mu'minun (المؤمنون)",
+    24: "An-Nur (النور)",
+    25: "Al-Furqan (الفرقان)",
+    26: "Ash-Shu'ara (الشعراء)",
+    27: "An-Naml (النمل)",
+    28: "Al-Qasas (القصص)",
+    29: "Al-Ankabut (العنكبوت)",
+    30: "Ar-Rum (الروم)",
+    31: "Luqman (لقمان)",
+    32: "As-Sajdah (السجدة)",
+    33: "Al-Ahzab (الأحزاب)",
+    34: "Saba (سبأ)",
+    35: "Fatir (فاطر)",
+    36: "Ya-Sin (يس)",
+    37: "As-Saffat (الصافات)",
+    38: "Sad (ص)",
+    39: "Az-Zumar (الزمر)",
+    40: "Ghafir (غافر)",
+    41: "Fussilat (فصلت)",
+    42: "Ash-Shura (الشورى)",
+    43: "Az-Zukhruf (الزخرف)",
+    44: "Ad-Dukhkhan (الدخان)",
+    45: "Al-Jathiya (الجاثية)",
+    46: "Al-Ahqaf (الأحقاف)",
+    47: "Muhammad (محمد)",
+    48: "Al-Fath (الفتح)",
+    49: "Al-Hujurat (الحجرات)",
+    50: "Qaf (ق)",
+    51: "Adh-Dhariyat (الذاريات)",
+    52: "At-Tur (الطور)",
+    53: "An-Najm (النجم)",
+    54: "Al-Qamar (القمر)",
+    55: "Ar-Rahman (الرحمن)",
+    56: "Al-Waqi'ah (الواقعة)",
+    57: "Al-Hadid (الحديد)",
+    58: "Al-Mujadila (المجادلة)",
+    59: "Al-Hashr (الحشر)",
+    60: "Al-Mumtahina (الممتحنة)",
+    61: "As-Saff (الصف)",
+    62: "Al-Jumu'ah (الجمعة)",
+    63: "Al-Munafiqoon (المنافقون)",
+    64: "At-Taghabun (التغابن)",
+    65: "At-Talaq (الطلاق)",
+    66: "At-Tahrim (التحريم)",
+    67: "Al-Mulk (الملك)",
+    68: "Al-Qalam (القلم)",
+    69: "Al-Haqqah (الحاقة)",
+    70: "Al-Ma'arij (المعارج)",
+    71: "Nooh (نوح)",
+    72: "Al-Jinn (الجن)",
+    73: "Al-Muzzammil (المزمل)",
+    74: "Al-Muddathir (المدثر)",
+    75: "Al-Qiyamah (القيامة)",
+    76: "Al-Insan (الإنسان)",
+    77: "Al-Mursalat (المرسلات)",
+    78: "An-Naba (النبأ)",
+    79: "An-Nazi'at (النازعات)",
+    80: "Abasa (عبس)",
+    81: "At-Takwir (التكوير)",
+    82: "Al-Infitar (الإنفطار)",
+    83: "Al-Mutaffifin (المطففين)",
+    84: "Al-Inshiqaq (الإنشقاق)",
+    85: "Al-Buruj (البروج)",
+    86: "At-Tariq (الطارق)",
+    87: "Al-A'la (الأعلى)",
+    88: "Al-Ghashiyah (الغاشية)",
+    89: "Al-Fajr (الفجر)",
+    90: "Al-Balad (البلد)",
+    91: "Ash-Shams (الشمس)",
+    92: "Al-Lail (الليل)",
+    93: "Ad-Duha (الضحى)",
+    94: "Ash-Sharh (الشرح)",
+    95: "At-Tin (التين)",
+    96: "Al-Alaq (العلق)",
+    97: "Al-Qadr (القدر)",
+    98: "Al-Bayyina (البينة)",
+    99: "Az-Zalzalah (الزلزلة)",
+    100: "Al-Adiyat (العاديات)",
+    101: "Al-Qari'ah (القارعة)",
+    102: "At-Takathur (التكاثر)",
+    103: "Al-Asr (العصر)",
+    104: "Al-Humazah (الهمزة)",
+    105: "Al-Fil (الفيل)",
+    106: "Quraish (قريش)",
+    107: "Al-Ma'un (الماعون)",
+    108: "Al-Kawthar (الكوثر)",
+    109: "Al-Kafirun (الكافرون)",
+    110: "An-Nasr (النصر)",
+    111: "Al-Masad (المسد)",
+    112: "Al-Ikhlas (الإخلاص)",
+    113: "Al-Falaq (الفلق)",
+    114: "An-Nas (الناس)",
+}
+# Pre-load all verses at startup
+all_verses = []
+surahs_dir = "surahs_json_files"
+if not os.path.isdir(surahs_dir):
+    raise FileNotFoundError("Missing 'surahs_json_files/' folder. Upload it from the original repo.")
+for filename in sorted(os.listdir(surahs_dir)):
+    if filename.endswith(".json"):
+        try:
+            surah_number = int(filename.split("_")[0])
+        except:
+            continue
+        surah_name = surah_names.get(surah_number, f"Surah {surah_number}")
+        file_path = os.path.join(surahs_dir, filename)
+        with open(file_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        verses = [ayah["text"] for ayah in data.get("ayahs", []) if "text" in ayah]
+        for ayah_number, verse_text in enumerate(verses, start=1):
+            all_verses.append((surah_number, surah_name, ayah_number, verse_text))
+print(f"Loaded {len(all_verses)} verses from {len(os.listdir(surahs_dir))} surahs.")
+def normalize_text(text: str) -> str:
+    return " ".join(text.strip().split())
+def find_best_verse(transcription: str) -> Dict[str, Any]:
+    transcription_norm = normalize_text(transcription)
+    if not transcription_norm:
+        return {"error": "Empty transcription"}
+    best_ratio = 0.0
+    best_match: Optional[Dict[str, Any]] = None
+    for surah_number, surah_name, ayah_number, verse_text in all_verses:
+        verse_norm = normalize_text(verse_text)
+        ratio = SequenceMatcher(None, transcription_norm, verse_norm).ratio()
+        if ratio > best_ratio:
+            best_ratio = ratio
+            best_match = {
+                "surah_number": surah_number,
+                "surah_name": surah_name,
+                "ayah_number": ayah_number,
+                "verse_text": verse_text,
+                "similarity": round(ratio, 4)
+            }
+    if best_match and best_ratio >= 0.75:  # Adjustable threshold
+        return best_match
+    else:
+        return {
+            "error": "No confident match found",
+            "best_similarity": round(best_ratio, 4) if best_match else 0.0,
+            "possible_match": best_match
+        }
+@app.get("/")
+def root():
+    return {"message": "Quranic Verse Recognition API running. POST audio to /recognize"}
+@app.post("/recognize")
+async def recognize(file: UploadFile = File(...)):
+    if not file.content_type or not file.content_type.startswith("audio/"):
+        raise HTTPException(status_code=400, detail="File must be an audio file")
+    # Save to temp file (pipeline accepts file path directly)
+    contents = await file.read()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1] or ".wav") as tmp:
+        tmp.write(contents)
+        tmp_path = tmp.name
+    try:
+        transcription = pipe(tmp_path)["text"]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Transcription error: {str(e)}")
+    finally:
+        os.unlink(tmp_path)
+    result = find_best_verse(transcription)
+    result["transcription"] = transcription
+    return JSONResponse(content=result)