First_agent_template

Running

App Files Files Community

SamarthPujari commited on Aug 9, 2025

Commit

9c3aa32

verified ·

1 Parent(s): edffea1

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -9

app.py CHANGED Viewed

@@ -127,36 +127,46 @@ def image_generator(prompt: str) -> str:
     return f"Image saved at {output_path}"
 # -------------------- Local LLM (Replaces HfApiModel) --------------------
-from transformers import pipeline
 class LocalModel:
     """
     Minimal local model interface compatible with smolagents CodeAgent.
     """
     def __init__(self):
         self.pipeline = pipeline(
             "text-generation",
-            model="openlm-research/open_llama_3b",
-            device_map="auto",
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
         )
     def generate(self, prompt, **kwargs):
         """
         Generate text from a given prompt.
         Args:
             prompt (str): Input prompt for generation.
             **kwargs: Additional parameters for the pipeline.
         Returns:
             str: Generated text output.
         """
-        result = self.pipeline(prompt, max_new_tokens=500, do_sample=True)
         return result[0]['generated_text']
-model = LocalModel()
 # -------------------- Agent Setup --------------------
 final_answer = FinalAnswerTool()
 search_tool = DuckDuckGoSearchTool()

     return f"Image saved at {output_path}"
 # -------------------- Local LLM (Replaces HfApiModel) --------------------
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
 class LocalModel:
     """
     Minimal local model interface compatible with smolagents CodeAgent.
     """
     def __init__(self):
+        model_name = "openlm-research/open_llama_3b"
+        # Load tokenizer with use_fast=False to avoid SentencePiece conversion error
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+        # Load model with appropriate dtype and device map
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+        )
+        # Create pipeline with explicit tokenizer and model
         self.pipeline = pipeline(
             "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device=0 if torch.cuda.is_available() else -1,
         )
     def generate(self, prompt, **kwargs):
         """
         Generate text from a given prompt.
         Args:
             prompt (str): Input prompt for generation.
             **kwargs: Additional parameters for the pipeline.
         Returns:
             str: Generated text output.
         """
+        result = self.pipeline(prompt, max_new_tokens=500, do_sample=True, **kwargs)
         return result[0]['generated_text']
 # -------------------- Agent Setup --------------------
 final_answer = FinalAnswerTool()
 search_tool = DuckDuckGoSearchTool()