reveseforward commited on
Commit
5ea2900
·
1 Parent(s): 239391a
Files changed (3) hide show
  1. app.py +0 -58
  2. handler.py +19 -0
  3. requirements.txt +3 -4
app.py DELETED
@@ -1,58 +0,0 @@
1
- import torch
2
- from transformers import AutoProcessor, AutoModelForVision2Seq
3
- from PIL import Image
4
- import base64
5
- import io
6
-
7
- # Load model & processor once at startup
8
- MODEL_NAME = "unsloth/qwen-2.5-vl-7b-instruct"
9
-
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
-
12
- processor = AutoProcessor.from_pretrained(MODEL_NAME)
13
- model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)
14
- model.eval()
15
-
16
- def decode_base64_image(base64_str: str) -> Image.Image:
17
- """Decode a base64 string to a PIL image"""
18
- image_bytes = base64.b64decode(base64_str)
19
- return Image.open(io.BytesIO(image_bytes)).convert("RGB")
20
-
21
- def handler(inputs: dict):
22
- """
23
- Expected input:
24
- {
25
- "image": "<base64 string>",
26
- "prompt": "Describe this image"
27
- }
28
- """
29
- try:
30
- # Extract inputs
31
- img_b64 = inputs.get("image")
32
- prompt = inputs.get("prompt", "")
33
-
34
- if not img_b64:
35
- return {"error": "Missing 'image' field"}
36
-
37
- image = decode_base64_image(img_b64)
38
-
39
- # Preprocess
40
- pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
41
- input_ids = processor(prompt, return_tensors="pt").input_ids.to(device)
42
-
43
- # Generate
44
- outputs = model.generate(
45
- input_ids=input_ids,
46
- pixel_values=pixel_values,
47
- max_new_tokens=256,
48
- do_sample=True,
49
- temperature=0.7
50
- )
51
-
52
- # Decode output
53
- text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
54
-
55
- return {"generated_text": text}
56
-
57
- except Exception as e:
58
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
handler.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from PIL import Image
4
+ import torch
5
+
6
+ MODEL_ID = "unsloth/qwen2.5-vl-7b-instruct"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_ID, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True
11
+ )
12
+
13
+ def infer(request):
14
+ messages = request.get("messages", [])
15
+ images = request.get("images", [])
16
+
17
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
18
+ outputs = model.generate(**inputs, max_new_tokens=512)
19
+ return {"text": tokenizer.decode(outputs[0])}
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- torch>=2.1.0
2
- transformers>=4.56.2
3
- Pillow
4
- safetensors
5
  accelerate
 
 
1
+ transformers>=4.56.3
2
+ torch>=2.2.0
 
 
3
  accelerate
4
+ pillow