vineeths commited on
Commit
da1cdd1
Β·
verified Β·
1 Parent(s): fed8eb3

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. main.py +126 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12
2
+
3
+ WORKDIR /deploy
4
+
5
+ COPY ./requirements.txt /deploy/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /deploy/requirements.txt
8
+
9
+ ENV PYTHONPATH=/deploy
10
+
11
+ COPY ./main.py /deploy/main.py
12
+
13
+ CMD ["fastapi", "run", "main.py", "--port", "8000"]
main.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ from fastapi import FastAPI, UploadFile, File
5
+ from fastapi.responses import HTMLResponse
6
+ from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, Qwen2_5_VLForConditionalGeneration, AutoModel
7
+ from qwen_vl_utils import process_vision_info
8
+
9
+ # Specify the model path or identifier.
10
+ MODEL_PATH = "Ananthu01/qwen2.5_vl_finetuned_model"
11
+ # MODEL_PATH = "Qwen/Qwen2.5-VL-3B-Instruct"
12
+
13
+ # Initialize the Qwen2.5 VL model.
14
+ # model = AutoModel.from_pretrained(
15
+ # MODEL_PATH,
16
+ # trust_remote_code=True,
17
+ # device_map="cpu" # Ensures the model is loaded on CPU.
18
+ # )
19
+
20
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
21
+ MODEL_PATH,
22
+ trust_remote_code=True,
23
+ device_map="cpu", # Ensures the model is loaded on CPU.
24
+ wbits=4, # Example parameter for 4-bit quantization
25
+ groupsize=128, # Example parameter for groupsize setting
26
+ use_safetensors=True
27
+ )
28
+
29
+ # Load the processor.
30
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
31
+
32
+ # Load the tokenizer.
33
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
34
+
35
+ # Define generation parameters.
36
+ generation_config = GenerationConfig(
37
+ temperature=0.1, # Adjust temperature as needed.
38
+ top_p=0.8, # Nucleus sampling probability.
39
+ repetition_penalty=1.05, # Penalty to avoid repetitive outputs.
40
+ max_new_tokens=1024 # Maximum tokens to generate.
41
+ )
42
+
43
+ # Create FastAPI app instance.
44
+ app = FastAPI()
45
+
46
+ @app.get("/", response_class=HTMLResponse)
47
+ async def main():
48
+ """
49
+ GET endpoint that renders an HTML form for the user to upload an image.
50
+ """
51
+ content = """
52
+ <html>
53
+ <head>
54
+ <title>Qwen2.5 VL Image Upload</title>
55
+ </head>
56
+ <body>
57
+ <h2>Upload an Image</h2>
58
+ <form action="/generate" enctype="multipart/form-data" method="post">
59
+ <input name="image_file" type="file" accept="image/*">
60
+ <input type="submit" value="Submit">
61
+ </form>
62
+ </body>
63
+ </html>
64
+ """
65
+ return content
66
+
67
+ @app.post("/generate")
68
+ async def generate_output(image_file: UploadFile = File(...)):
69
+ """
70
+ POST endpoint to generate model output using an uploaded image.
71
+ The text prompt is fixed to "Extract JSON".
72
+
73
+ - **image_file**: The image file uploaded by the user.
74
+ """
75
+ # Read the uploaded image.
76
+ image_bytes = await image_file.read()
77
+
78
+ # Save the image temporarily.
79
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
80
+ tmp.write(image_bytes)
81
+ tmp_path = tmp.name
82
+
83
+ # Construct messages with a hardcoded text instruction.
84
+ messages = [
85
+ {
86
+ "role": "user",
87
+ "content": [
88
+ {"type": "image", "image": tmp_path},
89
+ {"type": "text", "text": "Extract JSON"},
90
+ ],
91
+ }
92
+ ]
93
+
94
+ # Apply the chat template using the processor.
95
+ prompt = processor.apply_chat_template(
96
+ messages,
97
+ tokenize=False,
98
+ add_generation_prompt=True
99
+ )
100
+
101
+ # Process multimodal inputs.
102
+ image_inputs, video_inputs = process_vision_info(messages)
103
+ mm_data = {}
104
+ if image_inputs is not None:
105
+ mm_data["image"] = image_inputs
106
+
107
+ # Tokenize the prompt.
108
+ inputs = tokenizer(prompt, return_tensors="pt")
109
+ inputs = {k: v.to("cpu") for k, v in inputs.items()}
110
+
111
+ # Generate output from the model.
112
+ # Note: It is assumed that Qwen2.5 VL’s generate method accepts a multi_modal_data argument.
113
+ generated_ids = model.generate(
114
+ **inputs,
115
+ generation_config=generation_config,
116
+ multi_modal_data=mm_data
117
+ )
118
+
119
+ # Decode the generated text.
120
+ generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
121
+
122
+ # Clean up the temporary file.
123
+ os.remove(tmp_path)
124
+
125
+ # Return the generated text.
126
+ return generated_text
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ vllm
3
+ git+https://github.com/huggingface/transformers
4
+ accelerate
5
+ qwen-vl-utils