Spaces:
Sleeping
Sleeping
| import os | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| import torch # För att kontrollera enheter | |
| # Importera ditt nya sökverktyg | |
| from tools.tavily_search import search_tavily | |
| class GaiaAgent: | |
| def __init__(self, model_id: str = "google/gemma-2b-it"): | |
| # Ladda tokenizer och modell manuellt. Detta ger mer kontroll. | |
| try: | |
| print(f"Laddar tokenizer för {model_id}...") | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv("HF_TOKEN")) | |
| print(f"Laddar modell för {model_id}...") | |
| # Kontrollera om GPU är tillgänglig | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Använder enhet: {device}") | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, # Använd bfloat16 för minskat minne | |
| device_map="auto", # Accelerate hanterar detta över CPU/GPU | |
| token=os.getenv("HF_TOKEN") | |
| ) | |
| print("Modell laddad framgångsrikt.") | |
| # Skapa en pipeline för textgenerering | |
| self.text_generator = pipeline( | |
| "text-generation", | |
| model=self.model, | |
| tokenizer=self.tokenizer, | |
| # device=0 if device == "cuda" else -1 # 0 för första GPU, -1 för CPU | |
| ) | |
| print("Textgenereringspipeline skapad.") | |
| except Exception as e: | |
| print(f"Fel vid initiering av agent: {e}") | |
| raise RuntimeError(f"Fel vid laddning av modell eller tokenizer: {e}") | |
| # --- THIS IS THE MISSING __CALL__ METHOD --- | |
| def __call__(self, question: str) -> str: | |
| """ | |
| Denna metod gör att en instans av GaiaAgent kan kallas som en funktion. | |
| Den kommer att anropa din process_task metod för att generera svaret. | |
| """ | |
| print(f"Agent received question (first 50 chars): {question[:50]}...") | |
| result = self.process_task(question) | |
| print(f"Agent returning answer: {result[:100]}...") # För att inte fylla loggarna med för långa svar | |
| return result | |
| # --- END OF MISSING METHOD --- | |
| def process_task(self, task_description: str) -> str: | |
| # Instruction to the LLM to perform the task and use tools. | |
| # We need to build a prompt that instructs the model to use tools. | |
| prompt = f""" | |
| You are a helpful and expert AI assistant with access to a search tool. | |
| Your task is to carefully and accurately answer questions by using the search tool when necessary. | |
| Always provide a complete and correct answer based on the information you find. | |
| You must follow a Thought, Tool, Observation, Answer (TTOA) pattern. | |
| **Thought:** First, carefully consider the task. What information do you need to answer the question? Do you need to use a tool? | |
| **Tool:** If you need to search, use the search_tavily tool. The format is: <TOOL_CODE>search_tavily("your search query")</TOOL_CODE> | |
| **Observation:** After a tool call, you will receive an observation (the tool's output). This is factual information. | |
| **Answer:** Once you have gathered all necessary information, provide your final, concise answer directly. | |
| Your available tools: | |
| 1. search_tavily(query: str): Searches on Tavily and returns relevant results. | |
| Example Interaction: | |
| Task: What is the capital of France? | |
| Thought: I need to find the capital of France. I should use the search_tavily tool. | |
| Tool: <TOOL_CODE>search_tavily("capital of France")</TOOL_CODE> | |
| Observation: The capital of France is Paris. | |
| Answer: The capital of France is Paris. | |
| Now, let's start. | |
| Task: {task_description} | |
| """ | |
| max_iterations = 3 | |
| current_response_history = "" # Ny variabel för att bygga upp historiken | |
| for i in range(max_iterations): | |
| # Lägg till "Thought:" här för att uppmuntra modellen att starta sin tankeprocess | |
| full_prompt = prompt + current_response_history + "\n\nThought:" | |
| print(f"[{i+1}/{max_iterations}] Generating response with prompt length: {len(full_prompt)}") | |
| generated_text = self.text_generator( | |
| full_prompt, | |
| max_new_tokens=1024, # Fortsätt med 1024 eller öka till 2048 | |
| num_return_sequences=1, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| do_sample=True, | |
| top_k=50, top_p=0.95, | |
| temperature=0.7 | |
| )[0]['generated_text'] | |
| # Extrahera endast den nya delen av texten (modellens respons efter den sista "Thought:") | |
| new_content = generated_text[len(full_prompt):].strip() | |
| print(f"DEBUG - Full generated_text: \n---START---\n{generated_text}\n---END---") | |
| print(f"DEBUG - Extracted new_content: '{new_content}'") | |
| # Kontrollera om modellen genererade ett svar som en 'Answer:' | |
| if "Answer:" in new_content: | |
| final_answer = new_content.split("Answer:", 1)[1].strip() | |
| print(f"Final answer from model:\n{final_answer}") | |
| return final_answer # Returnera det slutgiltiga svaret | |
| elif "<TOOL_CODE>" in new_content and "</TOOL_CODE>" in new_content: | |
| # Modellen genererade ett verktygskall. | |
| # Vi vill inte inkludera modellens egna "Observation:" eller "Tool:"-text i historiken | |
| # innan verktyget faktiskt körts. Vi tar bara själva tool_code strängen. | |
| tool_call_start = new_content.find("<TOOL_CODE>") | |
| tool_call_end = new_content.find("</TOOL_CODE>") + len("</TOOL_CODE>") | |
| # Försök att extrahera tanken som ledde till verktygskallet | |
| thought_part = "" | |
| if "Thought:" in new_content[:tool_call_start]: | |
| thought_part = new_content.split("Thought:", 1)[1].split("Tool:", 1)[0].strip() | |
| elif tool_call_start > 0: # Om det finns text före tool code | |
| thought_part = new_content[:tool_call_start].strip() | |
| tool_code_section = new_content[tool_call_start:tool_call_end] | |
| tool_call_str = tool_code_section.replace("<TOOL_CODE>", "").replace("</TOOL_CODE>", "").strip() | |
| print(f"Tool call detected: {tool_call_str}") | |
| try: | |
| if tool_call_str.startswith("search_tavily("): | |
| query = tool_call_str[len("search_tavily("):-1].strip().strip('"').strip("'") | |
| tool_output = search_tavily(query) | |
| print(f"Tool result: {tool_output[:200]}...") | |
| # Lägg till tanken, verktygskallet och det FAKTISKA observationen till historiken | |
| current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n" | |
| else: | |
| tool_output = f"Unknown tool: {tool_call_str}" | |
| print(f"Error: {tool_output}") | |
| current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n" | |
| except Exception as tool_e: | |
| tool_output = f"Error running tool {tool_call_str}: {tool_e}" | |
| print(f"Error: {tool_output}") | |
| current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n" | |
| else: | |
| # Modellen genererade varken ett verktygskall eller ett slutgiltigt svar. | |
| # Lägg till det den faktiskt genererade till historiken så den kan fortsätta sin tanke. | |
| current_response_history += f"\n\nThought: {new_content}\n" | |
| print(f"Model generated non-tool/non-answer content. Appending: {new_content[:100]}...") | |
| # Om max_iterations nås utan slutgiltigt svar | |
| return "Agent could not complete the task within the allowed iterations. Latest relevant content: " + \ | |
| (current_response_history[-500:] if current_response_history else "No meaningful content generated.") | |