Hassan73 commited on
Commit
85802db
·
verified ·
1 Parent(s): 486bedb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -49
app.py CHANGED
@@ -1,25 +1,21 @@
1
- from fastapi import FastAPI, UploadFile, File, Form
2
  from transformers import pipeline
3
- from PIL import Image
4
  import torch
5
- import io
6
  import uvicorn
7
  import os
8
 
9
- app = FastAPI(title="MedGemma 4B Internal API")
10
 
11
- # Check if we are running on Hugging Face Spaces
12
- # Spaces usually provide GPUs, if not it will fallback to CPU (will be slow)
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
15
 
16
- print(f"Loading full 9GB MedGemma model on {device}...")
17
 
18
  try:
19
- # Use the pipeline API for the easiest implementation of the 9GB model
20
  pipe = pipeline(
21
- "image-text-to-text",
22
- model="google/medgemma-4b-it",
23
  torch_dtype=dtype,
24
  device_map="auto",
25
  )
@@ -31,55 +27,59 @@ except Exception as e:
31
  @app.get("/")
32
  def read_root():
33
  return {
34
- "status": "MedGemma 4B API is active",
35
- "device": device,
36
- "model_size": "Full 9GB"
37
  }
38
 
39
  @app.post("/analyze")
40
- async def analyze_image(
41
- prompt: str = Form("Describe this medical image and give a preliminary analysis."),
42
- file: UploadFile = File(None)
43
- ):
44
  if pipe is None:
45
  return {"error": "Model not loaded properly. Check logs."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Format messages for MedGemma with Arabic instructions
48
- system_prompt = "أنت خبير طبي ومستشار رقمي. يجب أن تكون إجابتك باللغة العربية بشكل أساسي. إذا وجدت مصطلحات طبية معقدة أو كلمات ليس لها ترجمة شائعة، فاذكرها بالإنجليزية بين أقواس. قدم إجابة علمية دقيقة بناءً على المعطيات."
49
 
50
  messages = [
51
- {
52
- "role": "system",
53
- "content": [{"type": "text", "text": system_prompt}]
54
- }
55
  ]
56
-
57
- # Handle image if provided
58
- user_content = [{"type": "text", "text": prompt}]
59
- if file is not None and file.filename != "":
60
- try:
61
- contents = await file.read()
62
- image = Image.open(io.BytesIO(contents)).convert("RGB")
63
- user_content.append({"type": "image", "image": image})
64
- except Exception as e:
65
- return {"error": f"Failed to process image: {str(e)}"}
66
-
67
- messages.append({
68
- "role": "user",
69
- "content": user_content
70
- })
71
 
72
- # Inference
73
- output = pipe(text=messages, max_new_tokens=250)
74
-
75
- # Extract the response text
76
- result = output[0]["generated_text"][-1]["content"]
77
-
78
- return {
79
- "analysis": result,
80
- "success": True
81
- }
 
 
 
 
 
 
 
 
 
82
 
83
  if __name__ == "__main__":
84
- # Port 7860 is the default for Hugging Face Spaces
85
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, Request
2
  from transformers import pipeline
 
3
  import torch
 
4
  import uvicorn
5
  import os
6
 
7
+ app = FastAPI(title="Qwen 0.5B AI Chat API")
8
 
9
+ # Check for GPU (even though free Space uses CPU)
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
12
 
13
+ print(f"Loading Qwen 2.5 0.5B model on {device}...")
14
 
15
  try:
 
16
  pipe = pipeline(
17
+ "text-generation",
18
+ model="Qwen/Qwen2.5-0.5B-Instruct",
19
  torch_dtype=dtype,
20
  device_map="auto",
21
  )
 
27
  @app.get("/")
28
  def read_root():
29
  return {
30
+ "status": "Chat API is active",
31
+ "model": "Qwen 0.5B",
32
+ "device": device
33
  }
34
 
35
  @app.post("/analyze")
36
+ async def chat_endpoint(request: Request):
 
 
 
37
  if pipe is None:
38
  return {"error": "Model not loaded properly. Check logs."}
39
+
40
+ prompt = ""
41
+ try:
42
+ # Primary: Accept JSON payload
43
+ data = await request.json()
44
+ prompt = data.get("prompt", "")
45
+ except Exception:
46
+ # Fallback: Trying to read form data just in case
47
+ try:
48
+ form = await request.form()
49
+ prompt = form.get("prompt", "")
50
+ except:
51
+ pass
52
+
53
+ if not prompt:
54
+ return {"error": "لا يوجد نص في الرسالة."}
55
 
56
+ # Set the personality and language for the model
57
+ system_prompt = "أنت مساعد ذكاء اصطناعي طبيب وودود. أجب باللغة العربية بوضوح وإيجاز."
58
 
59
  messages = [
60
+ {"role": "system", "content": system_prompt},
61
+ {"role": "user", "content": prompt}
 
 
62
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ try:
65
+ # Run inference using the chat template directly
66
+ output = pipe(
67
+ messages,
68
+ max_new_tokens=400,
69
+ do_sample=True,
70
+ temperature=0.7,
71
+ top_p=0.9
72
+ )
73
+
74
+ # The output includes the system, user, and assistant messages. We take the last one.
75
+ result = output[0]["generated_text"][-1]["content"]
76
+
77
+ return {
78
+ "analysis": result,
79
+ "success": True
80
+ }
81
+ except Exception as e:
82
+ return {"error": f"Failed to generate response: {str(e)}"}
83
 
84
  if __name__ == "__main__":
 
85
  uvicorn.run(app, host="0.0.0.0", port=7860)