Spaces:

Hassan73
/

CHAT_AI

Sleeping

App Files Files Community

Hassan73 commited on Mar 5

Commit

85802db

verified ·

1 Parent(s): 486bedb

Upload app.py

Browse files

Files changed (1) hide show

app.py +49 -49

app.py CHANGED Viewed

@@ -1,25 +1,21 @@
-from fastapi import FastAPI, UploadFile, File, Form
 from transformers import pipeline
-from PIL import Image
 import torch
-import io
 import uvicorn
 import os
-app = FastAPI(title="MedGemma 4B Internal API")
-# Check if we are running on Hugging Face Spaces
-# Spaces usually provide GPUs, if not it will fallback to CPU (will be slow)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
-print(f"Loading full 9GB MedGemma model on {device}...")
 try:
-    # Use the pipeline API for the easiest implementation of the 9GB model
     pipe = pipeline(
-        "image-text-to-text",
-        model="google/medgemma-4b-it",
         torch_dtype=dtype,
         device_map="auto",
     )
@@ -31,55 +27,59 @@ except Exception as e:
 @app.get("/")
 def read_root():
     return {
-        "status": "MedGemma 4B API is active",
-        "device": device,
-        "model_size": "Full 9GB"
     }
 @app.post("/analyze")
-async def analyze_image(
-    prompt: str = Form("Describe this medical image and give a preliminary analysis."),
-    file: UploadFile = File(None)
-):
     if pipe is None:
         return {"error": "Model not loaded properly. Check logs."}
-    # Format messages for MedGemma with Arabic instructions
-    system_prompt = "أنت خبير طبي ومستشار رقمي. يجب أن تكون إجابتك باللغة العربية بشكل أساسي. إذا وجدت مصطلحات طبية معقدة أو كلمات ليس لها ترجمة شائعة، فاذكرها بالإنجليزية بين أقواس. قدم إجابة علمية دقيقة بناءً على المعطيات."
     messages = [
-        {
-            "role": "system",
-            "content": [{"type": "text", "text": system_prompt}]
-        }
     ]
-    # Handle image if provided
-    user_content = [{"type": "text", "text": prompt}]
-    if file is not None and file.filename != "":
-        try:
-            contents = await file.read()
-            image = Image.open(io.BytesIO(contents)).convert("RGB")
-            user_content.append({"type": "image", "image": image})
-        except Exception as e:
-            return {"error": f"Failed to process image: {str(e)}"}
-    messages.append({
-        "role": "user",
-        "content": user_content
-    })
-    # Inference
-    output = pipe(text=messages, max_new_tokens=250)
-    # Extract the response text
-    result = output[0]["generated_text"][-1]["content"]
-    return {
-        "analysis": result,
-        "success": True
-    }
 if __name__ == "__main__":
-    # Port 7860 is the default for Hugging Face Spaces
     uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, Request
 from transformers import pipeline
 import torch
 import uvicorn
 import os
+app = FastAPI(title="Qwen 0.5B AI Chat API")
+# Check for GPU (even though free Space uses CPU)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+print(f"Loading Qwen 2.5 0.5B model on {device}...")
 try:
     pipe = pipeline(
+        "text-generation",
+        model="Qwen/Qwen2.5-0.5B-Instruct",
         torch_dtype=dtype,
         device_map="auto",
     )
 @app.get("/")
 def read_root():
     return {
+        "status": "Chat API is active",
+        "model": "Qwen 0.5B",
+        "device": device
     }
 @app.post("/analyze")
+async def chat_endpoint(request: Request):
     if pipe is None:
         return {"error": "Model not loaded properly. Check logs."}
+    prompt = ""
+    try:
+        # Primary: Accept JSON payload
+        data = await request.json()
+        prompt = data.get("prompt", "")
+    except Exception:
+        # Fallback: Trying to read form data just in case
+        try:
+            form = await request.form()
+            prompt = form.get("prompt", "")
+        except:
+            pass
+    if not prompt:
+        return {"error": "لا يوجد نص في الرسالة."}
+    # Set the personality and language for the model
+    system_prompt = "أنت مساعد ذكاء اصطناعي طبيب وودود. أجب باللغة العربية بوضوح وإيجاز."
     messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt}
     ]
+    try:
+        # Run inference using the chat template directly
+        output = pipe(
+            messages,
+            max_new_tokens=400,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9
+        )
+        # The output includes the system, user, and assistant messages. We take the last one.
+        result = output[0]["generated_text"][-1]["content"]
+        return {
+            "analysis": result,
+            "success": True
+        }
+    except Exception as e:
+        return {"error": f"Failed to generate response: {str(e)}"}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)