Spaces:

Mr-Help
/

Test-gemma-directly

Sleeping

App Files Files Community

Mr-Help commited on Mar 16

Commit

a3bb57e

verified ·

1 Parent(s): fc3dca3

Create app.py

Browse files

Files changed (1) hide show

app.py +194 -0

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os
+import re
+from contextlib import asynccontextmanager
+from typing import List, Optional
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+# =========================
+# Config
+# =========================
+MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-1b-it")
+MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "12"))
+# لو عايز تغير الانتنـتس من غير تعديل الكود:
+# مثال:
+# INTENTS="greeting,pricing,complaint,booking,follow_up,other"
+INTENTS_ENV = os.getenv(
+    "INTENTS",
+    "same_path,change_path,greeting,pricing,booking,complaint,follow_up,other"
+)
+ALLOWED_INTENTS = [x.strip() for x in INTENTS_ENV.split(",") if x.strip()]
+model = None
+processor = None
+# =========================
+# Schemas
+# =========================
+class IntentRequest(BaseModel):
+    message: str
+    intents: Optional[List[str]] = None
+    system_prompt: Optional[str] = None
+class IntentResponse(BaseModel):
+    intent: str
+    raw_output: str
+    model: str
+# =========================
+# Helpers
+# =========================
+def normalize_intent(text: str, allowed_intents: List[str]) -> str:
+    cleaned = text.strip().lower()
+    # شيل أي markdown/code fences أو علامات زيادة
+    cleaned = cleaned.replace("```", "").replace("`", "").strip()
+    # لو الموديل رجّع جملة فيها intent ضمن النص
+    for intent in allowed_intents:
+        if re.search(rf"\b{re.escape(intent.lower())}\b", cleaned):
+            return intent
+    # fallback
+    return "other"
+def build_prompt(user_message: str, allowed_intents: List[str], custom_system_prompt: Optional[str]) -> List[dict]:
+    intent_list = ", ".join(allowed_intents)
+    system_text = custom_system_prompt or (
+        "You are an intent classifier.\n"
+        f"Choose exactly one intent from this list: {intent_list}.\n"
+        "Return only the intent label, with no explanation, no punctuation, and no extra words."
+    )
+    return [
+        {
+            "role": "system",
+            "content": [{"type": "text", "text": system_text}]
+        },
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": user_message}]
+        }
+    ]
+def run_intent_classification(user_message: str, allowed_intents: List[str], custom_system_prompt: Optional[str]) -> tuple[str, str]:
+    global model, processor
+    messages = build_prompt(user_message, allowed_intents, custom_system_prompt)
+    inputs = processor.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    )
+    # CPU inference
+    with torch.inference_mode():
+        generation = model.generate(
+            **inputs,
+            max_new_tokens=MAX_NEW_TOKENS,
+            do_sample=False,
+            temperature=None,
+            top_p=None,
+        )
+    input_len = inputs["input_ids"].shape[-1]
+    generated_tokens = generation[0][input_len:]
+    decoded = processor.decode(generated_tokens, skip_special_tokens=True).strip()
+    final_intent = normalize_intent(decoded, allowed_intents)
+    return final_intent, decoded
+# =========================
+# Lifespan
+# =========================
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global model, processor
+    print(f"[startup] Loading model: {MODEL_ID}")
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    model = Gemma3ForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        device_map="cpu"
+    ).eval()
+    print("[startup] Model loaded successfully.")
+    yield
+    print("[shutdown] App is shutting down.")
+app = FastAPI(
+    title="Gemma Intent Classifier API",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# =========================
+# Routes
+# =========================
+@app.get("/")
+def root():
+    return {
+        "status": "ok",
+        "message": "Gemma Intent Classifier API is running."
+    }
+@app.get("/health")
+def health():
+    return {
+        "status": "healthy",
+        "model": MODEL_ID
+    }
+@app.post("/intent", response_model=IntentResponse)
+def classify_intent(payload: IntentRequest):
+    if not payload.message or not payload.message.strip():
+        raise HTTPException(status_code=400, detail="message is required")
+    allowed_intents = payload.intents if payload.intents else ALLOWED_INTENTS
+    if not allowed_intents:
+        raise HTTPException(status_code=400, detail="No intents provided")
+    try:
+        intent, raw_output = run_intent_classification(
+            user_message=payload.message.strip(),
+            allowed_intents=allowed_intents,
+            custom_system_prompt=payload.system_prompt
+        )
+        print("========== REQUEST ==========")
+        print(f"message: {payload.message}")
+        print(f"allowed_intents: {allowed_intents}")
+        print("========== RESPONSE =========")
+        print(f"raw_output: {raw_output}")
+        print(f"intent: {intent}")
+        print("================================")
+        return IntentResponse(
+            intent=intent,
+            raw_output=raw_output,
+            model=MODEL_ID
+        )
+    except Exception as e:
+        print(f"[error] {repr(e)}")
+        raise HTTPException(status_code=500, detail=str(e))