Final_Assignment_Template

Configuration error

App Files Files Community

oremaz commited on Jun 15, 2025

Commit

0007857

1 Parent(s): 5fa5125

Update agent.py

Browse files

Files changed (1) hide show

agent.py +31 -19

agent.py CHANGED Viewed

@@ -120,7 +120,8 @@ def initialize_models(use_api_mode=False):
         print("Initializing models in non-API mode with local models...")
         try :
-            from typing import Any, Optional, List, Mapping
             from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
             from llama_index.core.llms.callbacks import llm_completion_callback
             from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
@@ -128,16 +129,18 @@ def initialize_models(use_api_mode=False):
             import torch
             class QwenVL7BCustomLLM(CustomLLM):
-                context_window: int = 32768
-                num_output: int = 256
-                model_name: str = "Qwen/Qwen2.5-VL-7B-Instruct"
-                _device: str = PrivateAttr()
-                def __init__(self, device: str = "cuda", **kwargs):
-                    self._device = "cuda"
-                    self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
                         self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
                     )
-                    self.processor = AutoProcessor.from_pretrained(self.model_name)
                 @property
                 def metadata(self) -> LLMMetadata:
@@ -148,41 +151,50 @@ def initialize_models(use_api_mode=False):
                     )
                 @llm_completion_callback()
-                def complete(self, prompt: str, image_paths: Optional[List[str]] = None, **kwargs: Any) -> CompletionResponse:
-                    # Prepare messages for multimodal input
                     messages = [{"role": "user", "content": []}]
                     if image_paths:
                         for path in image_paths:
                             messages[0]["content"].append({"type": "image", "image": path})
                     messages[0]["content"].append({"type": "text", "text": prompt})
-                    # Process inputs
-                    text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
                     image_inputs, video_inputs = process_vision_info(messages)
-                    inputs = self.processor(
                         text=[text],
                         images=image_inputs,
                         videos=video_inputs,
                         padding=True,
                         return_tensors="pt",
                     )
-                    inputs = inputs.to(self.model.device)
                     # Generate output
-                    generated_ids = self.model.generate(**inputs, max_new_tokens=self.num_output)
                     generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
-                    output_text = self.processor.batch_decode(
                         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
                     )[0]
                     return CompletionResponse(text=output_text)
                 @llm_completion_callback()
-                def stream_complete(self, prompt: str, image_paths: Optional[List[str]] = None, **kwargs: Any) -> CompletionResponseGen:
                     response = self.complete(prompt, image_paths)
                     for token in response.text:
                         yield CompletionResponse(text=token, delta=token)
             proj_llm = QwenVL7BCustomLLM()
             # Code LLM

         print("Initializing models in non-API mode with local models...")
         try :
+            from typing import Optional, List, Any
+            from pydantic import Field, PrivateAttr
             from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
             from llama_index.core.llms.callbacks import llm_completion_callback
             from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
             import torch
             class QwenVL7BCustomLLM(CustomLLM):
+                model_name: str = Field(default="Qwen/Qwen2.5-VL-7B-Instruct")
+                context_window: int = Field(default=32768)
+                num_output: int = Field(default=256)
+                _model = PrivateAttr()
+                _processor = PrivateAttr()
+                def __init__(self, **kwargs):
+                    super().__init__(**kwargs)
+                    self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
                         self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
                     )
+                    self._processor = AutoProcessor.from_pretrained(self.model_name)
                 @property
                 def metadata(self) -> LLMMetadata:
                     )
                 @llm_completion_callback()
+                def complete(
+                    self,
+                    prompt: str,
+                    image_paths: Optional[List[str]] = None,
+                    **kwargs: Any
+                ) -> CompletionResponse:
+                    # Prepare multimodal input
                     messages = [{"role": "user", "content": []}]
                     if image_paths:
                         for path in image_paths:
                             messages[0]["content"].append({"type": "image", "image": path})
                     messages[0]["content"].append({"type": "text", "text": prompt})
+                    # Tokenize and process
+                    text = self._processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
                     image_inputs, video_inputs = process_vision_info(messages)
+                    inputs = self._processor(
                         text=[text],
                         images=image_inputs,
                         videos=video_inputs,
                         padding=True,
                         return_tensors="pt",
                     )
+                    inputs = inputs.to(self._model.device)
                     # Generate output
+                    generated_ids = self._model.generate(**inputs, max_new_tokens=self.num_output)
                     generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+                    output_text = self._processor.batch_decode(
                         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
                     )[0]
                     return CompletionResponse(text=output_text)
                 @llm_completion_callback()
+                def stream_complete(
+                    self,
+                    prompt: str,
+                    image_paths: Optional[List[str]] = None,
+                    **kwargs: Any
+                ) -> CompletionResponseGen:
                     response = self.complete(prompt, image_paths)
                     for token in response.text:
                         yield CompletionResponse(text=token, delta=token)
             proj_llm = QwenVL7BCustomLLM()
             # Code LLM