iammartian0 commited on
Commit
53cf0f3
·
verified ·
1 Parent(s): 3c3ef32

Create agent.py

Browse files
Files changed (1) hide show
  1. agent.py +220 -0
agent.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from dotenv import load_dotenv
4
+ from tavily import TavilyClient
5
+ from cerebras.cloud.sdk import Cerebras
6
+
7
+ load_dotenv()
8
+
9
+ # --- HELPER TOOLS ---
10
+
11
+ class WebSearchTool:
12
+ """Search the web using Tavily"""
13
+
14
+ def __init__(self, api_key: str):
15
+ self.client = TavilyClient(api_key=api_key)
16
+
17
+ def search(self, query: str, max_results: int = 5) -> str:
18
+ """Search and return formatted results"""
19
+ try:
20
+ response = self.client.search(
21
+ query=query,
22
+ search_depth="advanced",
23
+ max_results=max_results,
24
+ include_answer=True
25
+ )
26
+
27
+ # Format results
28
+ output = []
29
+
30
+ if response.get("answer"):
31
+ output.append(f"Quick Answer: {response['answer']}\n")
32
+
33
+ output.append("Search Results:")
34
+ for i, result in enumerate(response.get("results", []), 1):
35
+ output.append(f"\n{i}. {result['title']}")
36
+ output.append(f" URL: {result['url']}")
37
+ output.append(f" {result['content'][:300]}...")
38
+
39
+ return "\n".join(output)
40
+
41
+ except Exception as e:
42
+ return f"Search error: {str(e)}"
43
+
44
+ class FileReaderTool:
45
+ """Read various file formats"""
46
+
47
+ def read(self, file_path: str) -> str:
48
+ """Read file and return content as text"""
49
+ if not os.path.exists(file_path):
50
+ return f"Error: File not found at {file_path}"
51
+
52
+ ext = os.path.splitext(file_path)[1].lower()
53
+
54
+ try:
55
+ # DOCX files
56
+ if ext == '.docx':
57
+ try:
58
+ from docx import Document
59
+ doc = Document(file_path)
60
+ text = [para.text for para in doc.paragraphs if para.text.strip()]
61
+ for table in doc.tables:
62
+ for row in table.rows:
63
+ cells = [cell.text.strip() for cell in row.cells]
64
+ text.append(" | ".join(cells))
65
+ return "\n".join(text)
66
+ except ImportError:
67
+ return "Error: python-docx not installed."
68
+
69
+ # PDF files
70
+ elif ext == '.pdf':
71
+ try:
72
+ import pdfplumber
73
+ with pdfplumber.open(file_path) as pdf:
74
+ text = [page.extract_text() for page in pdf.pages if page.extract_text()]
75
+ return "\n".join(text)
76
+ except ImportError:
77
+ return "Error: pdfplumber not installed."
78
+
79
+ # Excel/CSV files
80
+ elif ext in ['.xlsx', '.xls', '.csv']:
81
+ try:
82
+ import pandas as pd
83
+ if ext == '.csv':
84
+ df = pd.read_csv(file_path)
85
+ else:
86
+ df = pd.read_excel(file_path)
87
+ return df.to_string()
88
+ except ImportError:
89
+ return "Error: pandas or openpyxl not installed."
90
+
91
+ # Text files
92
+ elif ext in ['.txt', '.md', '.json']:
93
+ with open(file_path, 'r', encoding='utf-8') as f:
94
+ return f.read()
95
+
96
+ else:
97
+ return f"Unsupported file type: {ext}"
98
+
99
+ except Exception as e:
100
+ return f"Error reading file: {str(e)}"
101
+
102
+ class ImageAnalysisTool:
103
+ """Analyze images using OCR or vision models"""
104
+
105
+ def analyze(self, image_path: str, question: str = "Describe this image") -> str:
106
+ if not os.path.exists(image_path):
107
+ return f"Error: Image not found at {image_path}"
108
+
109
+ try:
110
+ # Try OCR first (fast and simple)
111
+ import pytesseract
112
+ from PIL import Image
113
+
114
+ img = Image.open(image_path)
115
+ text = pytesseract.image_to_string(img)
116
+
117
+ if text.strip():
118
+ return f"Text extracted from image:\n{text}"
119
+ else:
120
+ return "No text found in image (OCR returned empty)"
121
+
122
+ except ImportError:
123
+ return "Error: pytesseract or Pillow not installed."
124
+ except Exception as e:
125
+ return f"Error analyzing image: {str(e)}"
126
+
127
+ # --- MAIN AGENT CLASS ---
128
+
129
+ class BasicAgent:
130
+ """
131
+ Renamed from SimpleResearchAgent to match app.py requirements.
132
+ """
133
+
134
+ def __init__(self):
135
+ print("--- Initializing BasicAgent ---")
136
+
137
+ # 1. Load Keys internally
138
+ self.hf_token = os.getenv("HF_TOKEN")
139
+ self.cerebras_key = os.getenv("CEREBRAS_API_KEY")
140
+ self.tavily_key = os.getenv("TAVILY_API_KEY")
141
+
142
+ if not self.cerebras_key or not self.tavily_key:
143
+ raise ValueError("❌ Missing API Keys. Please check Space Settings.")
144
+
145
+ # 2. Initialize LLM
146
+ self.llm = Cerebras(api_key=self.cerebras_key)
147
+ self.model = "gpt-oss-120b" # Or "llama3.1-8b"
148
+
149
+ # 3. Initialize tools
150
+ self.web_search = WebSearchTool(self.tavily_key)
151
+ self.file_reader = FileReaderTool()
152
+ self.image_analyzer = ImageAnalysisTool()
153
+
154
+ print("✅ BasicAgent initialized successfully.")
155
+
156
+ def _call_llm(self, messages: list, temperature: float = 0.0) -> str:
157
+ """Call LLM and return response"""
158
+ try:
159
+ response = self.llm.chat.completions.create(
160
+ model=self.model,
161
+ messages=messages,
162
+ temperature=temperature,
163
+ max_tokens=200 # Prevent long rambling
164
+ )
165
+ content = response.choices[0].message.content
166
+ return content.strip() if content else "Error: Empty response."
167
+ except Exception as e:
168
+ return f"LLM Error: {str(e)}"
169
+
170
+ def answer(self, question: str, mode="context") -> str:
171
+ """
172
+ Main method called by app.py.
173
+ Note: app.py only passes 'question', not 'file_path'.
174
+ """
175
+ print(f"Processing: {question[:50]}...")
176
+
177
+ # 1. Detect if this is a Logic/Trick question (GAIA style)
178
+ is_logic = any(keyword in question.lower() for keyword in [
179
+ 'opposite', 'backwards', 'reversed', 'if you understand', 'python code'
180
+ ])
181
+
182
+ context_parts = []
183
+
184
+ # 2. Search Web (Skip if it's purely a logic puzzle)
185
+ if not is_logic:
186
+ # Clean question for search (remove "Answer this..." etc)
187
+ search_results = self.web_search.search(question)
188
+ context_parts.append(f"Web Search Results:\n{search_results}")
189
+ else:
190
+ context_parts.append("Logic/Reasoning Task (No Search Performed)")
191
+
192
+ context = "\n\n".join(context_parts)
193
+
194
+ # 3. Construct System Prompt
195
+ # We use the GAIA-style prompt for strictness
196
+ messages = [
197
+ {
198
+ "role": "system",
199
+ "content": (
200
+ "You are a precise data extraction engine. "
201
+ "Answer with ONLY the exact value requested. "
202
+ "No explanations, no preambles, no conversational filler. "
203
+ "Examples: '42', 'John Smith', 'Paris', 'right'. "
204
+ )
205
+ },
206
+ {
207
+ "role": "user",
208
+ "content": f"Context:\n{context}\n\nQuestion: {question}\n\nExact Answer:"
209
+ }
210
+ ]
211
+
212
+ return self._call_llm(messages)
213
+
214
+ def __call__(self, question: str) -> str:
215
+ return self.answer(question)
216
+
217
+ # For local testing
218
+ if __name__ == "__main__":
219
+ agent = BasicAgent()
220
+ print(agent("What is the capital of France?"))