Spaces:

MGZON
/

api-mg

Runtime error

App Files Files Community

MGZON commited on Aug 26, 2025

Commit

fca51d8

verified ·

1 Parent(s): 9ec6eb1

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -14

app.py CHANGED Viewed

@@ -1,71 +1,104 @@
 import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from llama_cpp import Llama
-# إعداد مسار الـ cache
 CACHE_DIR = os.environ.get("HF_HOME", "/app/.cache/huggingface")
-# تأكد من أن المكتبتين تقرأ المتغيّرات البيئية
 os.environ["HF_HOME"] = CACHE_DIR
-# إنشاء التطبيق
 app = FastAPI(
     title="MGZON Smart Assistant",
-    description="دمج نموذج T5 المدرب مع Mistral‑7B (GGUF) داخل Space"
 )
-# تحميل نموذج T5 المدرب من Hub
 T5_REPO = "MGZON/mgzon-flan-t5-base"
 try:
-    t5_tokenizer = AutoTokenizer.from_pretrained(T5_REPO, cache_dir=CACHE_DIR)
-    t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_REPO, cache_dir=CACHE_DIR)
 except Exception as e:
-    raise RuntimeError(f"فشل تحميل نموذج T5 من {T5_REPO}: {str(e)}")
-# تحميل ملف Mistral .gguf
 gguf_path = os.path.abspath("models/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
 if not os.path.exists(gguf_path):
     raise RuntimeError(
-        f"ملف Mistral .gguf غير موجود في {gguf_path}. "
         "تأكد من أن ملف setup.sh تم تنفيذه أثناء الـ build."
     )
 try:
     mistral = Llama(
         model_path=gguf_path,
         n_ctx=2048,
         n_threads=8,
         # إذا كان لديك GPU، يمكنك إضافة: n_gpu_layers=35
     )
 except Exception as e:
-    raise RuntimeError(f"فشل تحميل نموذج Mistral من {gguf_path}: {str(e)}")
-# تعريف شكل الطلب (JSON)
 class AskRequest(BaseModel):
     question: str
     max_new_tokens: int = 150
-# نقطة النهاية /ask
 @app.post("/ask")
 def ask(req: AskRequest):
     q = req.question.strip()
     if not q:
         raise HTTPException(status_code=400, detail="Empty question")
     try:
         if any(tok in q.lower() for tok in ["mgzon", "flan", "t5"]):
             # نموذج T5
             inputs = t5_tokenizer(q, return_tensors="pt", truncation=True, max_length=256)
             out_ids = t5_model.generate(**inputs, max_length=req.max_new_tokens)
             answer = t5_tokenizer.decode(out_ids[0], skip_special_tokens=True)
             model_name = "MGZON-FLAN-T5"
         else:
             # نموذج Mistral
             out = mistral(prompt=q, max_tokens=req.max_new_tokens)
             answer = out["choices"][0]["text"].strip()
             model_name = "Mistral-7B-GGUF"
         return {"model": model_name, "response": answer}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}")

 import os
+import logging
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from llama_cpp import Llama
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Set up cache directory
 CACHE_DIR = os.environ.get("HF_HOME", "/app/.cache/huggingface")
+# Ensure libraries use the cache directory
 os.environ["HF_HOME"] = CACHE_DIR
+# Create the FastAPI app
 app = FastAPI(
     title="MGZON Smart Assistant",
+    description="دمج نموذج T5 المدرب مع Mistral-7B (GGUF) داخل Space"
 )
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+# Load T5 model from Hub
 T5_REPO = "MGZON/mgzon-flan-t5-base"
 try:
+    logger.info(f"Loading tokenizer for {T5_REPO} with HF_TOKEN")
+    t5_tokenizer = AutoTokenizer.from_pretrained(
+        T5_REPO,
+        cache_dir=CACHE_DIR,
+        use_auth_token=os.environ.get("HF_TOKEN")
+    )
+    logger.info(f"Successfully loaded tokenizer for {T5_REPO}")
+    logger.info(f"Loading model for {T5_REPO}")
+    t5_model = AutoModelForSeq2SeqLM.from_pretrained(
+        T5_REPO,
+        cache_dir=CACHE_DIR,
+        use_auth_token=os.environ.get("HF_TOKEN")
+    )
+    logger.info(f"Successfully loaded model for {T5_REPO}")
 except Exception as e:
+    logger.error(f"Failed to load T5 model from {T5_REPO}: {str(e)}")
+    raise RuntimeError(f"Failed to load T5 model from {T5_REPO}: {str(e)}")
+# Load Mistral GGUF model
 gguf_path = os.path.abspath("models/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
 if not os.path.exists(gguf_path):
+    logger.error(f"Mistral GGUF file not found at {gguf_path}")
     raise RuntimeError(
+        f"Mistral GGUF file not found at {gguf_path}. "
         "تأكد من أن ملف setup.sh تم تنفيذه أثناء الـ build."
     )
 try:
+    logger.info(f"Loading Mistral model from {gguf_path}")
     mistral = Llama(
         model_path=gguf_path,
         n_ctx=2048,
         n_threads=8,
         # إذا كان لديك GPU، يمكنك إضافة: n_gpu_layers=35
     )
+    logger.info(f"Successfully loaded Mistral model from {gguf_path}")
 except Exception as e:
+    logger.error(f"Failed to load Mistral model from {gguf_path}: {str(e)}")
+    raise RuntimeError(f"Failed to load Mistral model from {gguf_path}: {str(e)}")
+# Define request schema
 class AskRequest(BaseModel):
     question: str
     max_new_tokens: int = 150
+# Endpoint: /ask
 @app.post("/ask")
 def ask(req: AskRequest):
+    logger.info(f"Received question: {req.question}")
     q = req.question.strip()
     if not q:
+        logger.error("Empty question received")
         raise HTTPException(status_code=400, detail="Empty question")
     try:
         if any(tok in q.lower() for tok in ["mgzon", "flan", "t5"]):
             # نموذج T5
+            logger.info("Using MGZON-FLAN-T5 model")
             inputs = t5_tokenizer(q, return_tensors="pt", truncation=True, max_length=256)
             out_ids = t5_model.generate(**inputs, max_length=req.max_new_tokens)
             answer = t5_tokenizer.decode(out_ids[0], skip_special_tokens=True)
             model_name = "MGZON-FLAN-T5"
         else:
             # نموذج Mistral
+            logger.info("Using Mistral-7B-GGUF model")
             out = mistral(prompt=q, max_tokens=req.max_new_tokens)
             answer = out["choices"][0]["text"].strip()
             model_name = "Mistral-7B-GGUF"
+        logger.info(f"Response generated by {model_name}: {answer}")
         return {"model": model_name, "response": answer}
     except Exception as e:
+        logger.error(f"Error processing request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}")