marveljo commited on
Commit
7d0535a
·
verified ·
1 Parent(s): 03585f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -61
app.py CHANGED
@@ -1,70 +1,28 @@
 
 
1
  import torch
2
- from transformers import AutoProcessor, AutoModelForVision2Seq
3
- from fastapi import FastAPI, File, UploadFile, Form
4
- from fastapi.responses import JSONResponse
5
- from io import BytesIO
6
  from PIL import Image
 
7
 
8
- model_id = "HPAI-BSC/Aloe-Vision-7B-AR"
9
 
 
10
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
11
- model = AutoModelForVision2Seq.from_pretrained(
12
  model_id,
13
- torch_dtype=torch.bfloat16,
14
  device_map="auto",
15
- trust_remote_code=True,
16
  )
17
 
18
- app = FastAPI(title="Aloe Vision 7B AR API")
19
-
20
- @app.post("/predict")
21
- async def predict(
22
- file: UploadFile = File(None),
23
- question: str = Form(None)
24
- ):
25
- try:
26
- # --- Case 1: both image and text ---
27
- if file and question:
28
- image = Image.open(BytesIO(await file.read())).convert("RGB")
29
- messages = [
30
- {"role": "user", "content": [
31
- {"type": "image", "image": image},
32
- {"type": "text", "text": question}
33
- ]}
34
- ]
35
-
36
- # --- Case 2: text only ---
37
- elif question and not file:
38
- messages = [{"role": "user", "content": [{"type": "text", "text": question}]}]
39
-
40
- # --- Case 3: image only ---
41
- elif file and not question:
42
- image = Image.open(BytesIO(await file.read())).convert("RGB")
43
- messages = [
44
- {"role": "user", "content": [
45
- {"type": "image", "image": image},
46
- {"type": "text", "text": "Describe this image briefly."}
47
- ]}
48
- ]
49
- else:
50
- return JSONResponse({"error": "You must provide an image, text, or both."}, status_code=400)
51
-
52
- # --- Process ---
53
- text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
- image_inputs = processor.process_vision_info(messages)
55
- inputs = processor(text=[text], **image_inputs, return_tensors="pt").to(model.device)
56
-
57
- generated = model.generate(
58
- **inputs,
59
- max_new_tokens=256,
60
- do_sample=False,
61
- eos_token_id=processor.tokenizer.eos_token_id,
62
- )
63
-
64
- output_text = processor.batch_decode(generated, skip_special_tokens=True)[0]
65
- answer = output_text.split(text)[-1].strip()
66
-
67
- return JSONResponse({"answer": answer})
68
-
69
- except Exception as e:
70
- return JSONResponse({"error": str(e)}, status_code=500)
 
1
+ from fastapi import FastAPI, UploadFile
2
+ from transformers import AutoProcessor, AutoModelForImageTextToText
3
  import torch
 
 
 
 
4
  from PIL import Image
5
+ import io
6
 
7
+ app = FastAPI()
8
 
9
+ model_id = "HPAI-BSC/Aloe-Vision-7B-AR"
10
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
11
+ model = AutoModelForImageTextToText.from_pretrained(
12
  model_id,
13
+ dtype=torch.bfloat16,
14
  device_map="auto",
15
+ trust_remote_code=True
16
  )
17
 
18
+ @app.post("/analyze")
19
+ async def analyze(file: UploadFile):
20
+ image = Image.open(io.BytesIO(await file.read())).convert("RGB")
21
+ messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": "Describe this image"}]}]
22
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
23
+ image_inputs = processor.process_vision_info(messages)
24
+ inputs = processor(text=[text], **image_inputs, return_tensors="pt").to(model.device)
25
+
26
+ generated = model.generate(**inputs, max_new_tokens=256)
27
+ output_text = processor.batch_decode(generated, skip_special_tokens=True)[0]
28
+ return {"result": output_text}