File size: 1,919 Bytes
8f0412a
 
 
d6219de
1192448
7d0535a
1192448
8f0412a
 
 
039ebfd
7d0535a
8f0412a
d6219de
8f0412a
d6219de
8f0412a
d6219de
8f0412a
d6219de
1192448
8f0412a
 
 
7d0535a
8f0412a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import JSONResponse
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image
import io

app = FastAPI(title="Aloe Vision Backend")

print("🚀 Loading model, please wait...")

model_id = "HPAI-BSC/Aloe-Vision-7B-AR"

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForVision2Seq.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)

print("✅ Model loaded!")


@app.post("/analyze")
async def analyze(file: UploadFile, prompt: str = Form("Describe the image")):
    """Receive an image + prompt, return model output."""
    try:
        image_bytes = await file.read()
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": image},
                    {"type": "text", "text": prompt}
                ]
            }
        ]

        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        image_inputs = processor.process_vision_info(messages)
        inputs = processor(
            text=[text],
            **image_inputs,
            return_tensors="pt"
        ).to(model.device)

        generated = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=False,
            eos_token_id=processor.tokenizer.eos_token_id,
        )

        output_text = processor.batch_decode(generated, skip_special_tokens=True)[0]
        return {"result": output_text}

    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)


@app.get("/")
def root():
    return {"status": "ok", "message": "Aloe Vision Backend running!"}