marveljo commited on
Commit
039ebfd
·
verified ·
1 Parent(s): 616d85c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -13
app.py CHANGED
@@ -5,8 +5,8 @@ from fastapi.responses import JSONResponse
5
  from io import BytesIO
6
  from PIL import Image
7
 
8
- # --- Load model and processor ---
9
  model_id = "HPAI-BSC/Aloe-Vision-7B-AR"
 
10
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
11
  model = AutoModelForVision2Seq.from_pretrained(
12
  model_id,
@@ -17,25 +17,39 @@ model = AutoModelForVision2Seq.from_pretrained(
17
 
18
  app = FastAPI(title="Aloe Vision 7B AR API")
19
 
20
- # --- Inference endpoint ---
21
  @app.post("/predict")
22
  async def predict(
23
- file: UploadFile = File(...),
24
- question: str = Form("What do you see?")
25
  ):
26
  try:
27
- image = Image.open(BytesIO(await file.read())).convert("RGB")
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- messages = [
30
- {
31
- "role": "user",
32
- "content": [
 
33
  {"type": "image", "image": image},
34
- {"type": "text", "text": question},
35
- ],
36
- }
37
- ]
 
38
 
 
39
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40
  image_inputs = processor.process_vision_info(messages)
41
  inputs = processor(text=[text], **image_inputs, return_tensors="pt").to(model.device)
 
5
  from io import BytesIO
6
  from PIL import Image
7
 
 
8
  model_id = "HPAI-BSC/Aloe-Vision-7B-AR"
9
+
10
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
11
  model = AutoModelForVision2Seq.from_pretrained(
12
  model_id,
 
17
 
18
  app = FastAPI(title="Aloe Vision 7B AR API")
19
 
 
20
  @app.post("/predict")
21
  async def predict(
22
+ file: UploadFile = File(None),
23
+ question: str = Form(None)
24
  ):
25
  try:
26
+ # --- Case 1: both image and text ---
27
+ if file and question:
28
+ image = Image.open(BytesIO(await file.read())).convert("RGB")
29
+ messages = [
30
+ {"role": "user", "content": [
31
+ {"type": "image", "image": image},
32
+ {"type": "text", "text": question}
33
+ ]}
34
+ ]
35
+
36
+ # --- Case 2: text only ---
37
+ elif question and not file:
38
+ messages = [{"role": "user", "content": [{"type": "text", "text": question}]}]
39
 
40
+ # --- Case 3: image only ---
41
+ elif file and not question:
42
+ image = Image.open(BytesIO(await file.read())).convert("RGB")
43
+ messages = [
44
+ {"role": "user", "content": [
45
  {"type": "image", "image": image},
46
+ {"type": "text", "text": "Describe this image briefly."}
47
+ ]}
48
+ ]
49
+ else:
50
+ return JSONResponse({"error": "You must provide an image, text, or both."}, status_code=400)
51
 
52
+ # --- Process ---
53
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
  image_inputs = processor.process_vision_info(messages)
55
  inputs = processor(text=[text], **image_inputs, return_tensors="pt").to(model.device)