Spaces:
Configuration error
Configuration error
oremaz
commited on
Commit
·
0007857
1
Parent(s):
5fa5125
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -120,7 +120,8 @@ def initialize_models(use_api_mode=False):
|
|
| 120 |
print("Initializing models in non-API mode with local models...")
|
| 121 |
|
| 122 |
try :
|
| 123 |
-
from typing import
|
|
|
|
| 124 |
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
|
| 125 |
from llama_index.core.llms.callbacks import llm_completion_callback
|
| 126 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
|
@@ -128,16 +129,18 @@ def initialize_models(use_api_mode=False):
|
|
| 128 |
import torch
|
| 129 |
|
| 130 |
class QwenVL7BCustomLLM(CustomLLM):
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
| 138 |
self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
|
| 139 |
)
|
| 140 |
-
self.
|
| 141 |
|
| 142 |
@property
|
| 143 |
def metadata(self) -> LLMMetadata:
|
|
@@ -148,41 +151,50 @@ def initialize_models(use_api_mode=False):
|
|
| 148 |
)
|
| 149 |
|
| 150 |
@llm_completion_callback()
|
| 151 |
-
def complete(
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
messages = [{"role": "user", "content": []}]
|
| 154 |
if image_paths:
|
| 155 |
for path in image_paths:
|
| 156 |
messages[0]["content"].append({"type": "image", "image": path})
|
| 157 |
messages[0]["content"].append({"type": "text", "text": prompt})
|
| 158 |
|
| 159 |
-
#
|
| 160 |
-
text = self.
|
| 161 |
image_inputs, video_inputs = process_vision_info(messages)
|
| 162 |
-
inputs = self.
|
| 163 |
text=[text],
|
| 164 |
images=image_inputs,
|
| 165 |
videos=video_inputs,
|
| 166 |
padding=True,
|
| 167 |
return_tensors="pt",
|
| 168 |
)
|
| 169 |
-
inputs = inputs.to(self.
|
| 170 |
|
| 171 |
# Generate output
|
| 172 |
-
generated_ids = self.
|
| 173 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
| 174 |
-
output_text = self.
|
| 175 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 176 |
)[0]
|
| 177 |
return CompletionResponse(text=output_text)
|
| 178 |
|
| 179 |
@llm_completion_callback()
|
| 180 |
-
def stream_complete(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
response = self.complete(prompt, image_paths)
|
| 182 |
for token in response.text:
|
| 183 |
yield CompletionResponse(text=token, delta=token)
|
| 184 |
|
| 185 |
-
|
| 186 |
proj_llm = QwenVL7BCustomLLM()
|
| 187 |
|
| 188 |
# Code LLM
|
|
|
|
| 120 |
print("Initializing models in non-API mode with local models...")
|
| 121 |
|
| 122 |
try :
|
| 123 |
+
from typing import Optional, List, Any
|
| 124 |
+
from pydantic import Field, PrivateAttr
|
| 125 |
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
|
| 126 |
from llama_index.core.llms.callbacks import llm_completion_callback
|
| 127 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
|
|
|
| 129 |
import torch
|
| 130 |
|
| 131 |
class QwenVL7BCustomLLM(CustomLLM):
|
| 132 |
+
model_name: str = Field(default="Qwen/Qwen2.5-VL-7B-Instruct")
|
| 133 |
+
context_window: int = Field(default=32768)
|
| 134 |
+
num_output: int = Field(default=256)
|
| 135 |
+
_model = PrivateAttr()
|
| 136 |
+
_processor = PrivateAttr()
|
| 137 |
+
|
| 138 |
+
def __init__(self, **kwargs):
|
| 139 |
+
super().__init__(**kwargs)
|
| 140 |
+
self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 141 |
self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
|
| 142 |
)
|
| 143 |
+
self._processor = AutoProcessor.from_pretrained(self.model_name)
|
| 144 |
|
| 145 |
@property
|
| 146 |
def metadata(self) -> LLMMetadata:
|
|
|
|
| 151 |
)
|
| 152 |
|
| 153 |
@llm_completion_callback()
|
| 154 |
+
def complete(
|
| 155 |
+
self,
|
| 156 |
+
prompt: str,
|
| 157 |
+
image_paths: Optional[List[str]] = None,
|
| 158 |
+
**kwargs: Any
|
| 159 |
+
) -> CompletionResponse:
|
| 160 |
+
# Prepare multimodal input
|
| 161 |
messages = [{"role": "user", "content": []}]
|
| 162 |
if image_paths:
|
| 163 |
for path in image_paths:
|
| 164 |
messages[0]["content"].append({"type": "image", "image": path})
|
| 165 |
messages[0]["content"].append({"type": "text", "text": prompt})
|
| 166 |
|
| 167 |
+
# Tokenize and process
|
| 168 |
+
text = self._processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 169 |
image_inputs, video_inputs = process_vision_info(messages)
|
| 170 |
+
inputs = self._processor(
|
| 171 |
text=[text],
|
| 172 |
images=image_inputs,
|
| 173 |
videos=video_inputs,
|
| 174 |
padding=True,
|
| 175 |
return_tensors="pt",
|
| 176 |
)
|
| 177 |
+
inputs = inputs.to(self._model.device)
|
| 178 |
|
| 179 |
# Generate output
|
| 180 |
+
generated_ids = self._model.generate(**inputs, max_new_tokens=self.num_output)
|
| 181 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
| 182 |
+
output_text = self._processor.batch_decode(
|
| 183 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 184 |
)[0]
|
| 185 |
return CompletionResponse(text=output_text)
|
| 186 |
|
| 187 |
@llm_completion_callback()
|
| 188 |
+
def stream_complete(
|
| 189 |
+
self,
|
| 190 |
+
prompt: str,
|
| 191 |
+
image_paths: Optional[List[str]] = None,
|
| 192 |
+
**kwargs: Any
|
| 193 |
+
) -> CompletionResponseGen:
|
| 194 |
response = self.complete(prompt, image_paths)
|
| 195 |
for token in response.text:
|
| 196 |
yield CompletionResponse(text=token, delta=token)
|
| 197 |
|
|
|
|
| 198 |
proj_llm = QwenVL7BCustomLLM()
|
| 199 |
|
| 200 |
# Code LLM
|