Final_Assignment_Template

Sleeping

App Files Files Community

iammartian0 commited on Jan 18

Commit

cde9caa

verified ·

1 Parent(s): 81917a3

Create agent.py

Browse files

Files changed (1) hide show

agent.py +236 -0

agent.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""
+Cerebras-powered Research Agent for GAIA-style questions
+"""
+import os
+from cerebras.cloud.sdk import Cerebras
+from tavily import TavilyClient
+class WebSearchTool:
+    """Search the web using Tavily"""
+    def __init__(self, api_key: str):
+        self.client = TavilyClient(api_key=api_key)
+    def search(self, query: str, max_results: int = 5) -> str:
+        try:
+            response = self.client.search(
+                query=query,
+                search_depth="advanced",
+                max_results=max_results,
+                include_answer=True
+            )
+            output = []
+            if response.get("answer"):
+                output.append(f"Quick Answer: {response['answer']}\n")
+            output.append("Search Results:")
+            for i, result in enumerate(response.get("results", []), 1):
+                output.append(f"\n{i}. {result['title']}")
+                output.append(f"   {result['content'][:200]}...")
+            return "\n".join(output)
+        except Exception as e:
+            return f"Search error: {str(e)}"
+class FileReaderTool:
+    """Read various file formats"""
+    def read(self, file_path: str) -> str:
+        if not os.path.exists(file_path):
+            return f"Error: File not found"
+        ext = os.path.splitext(file_path)[1].lower()
+        try:
+            if ext == '.docx':
+                from docx import Document
+                doc = Document(file_path)
+                text = []
+                for para in doc.paragraphs:
+                    if para.text.strip():
+                        text.append(para.text)
+                for table in doc.tables:
+                    for row in table.rows:
+                        cells = [cell.text.strip() for cell in row.cells]
+                        text.append(" | ".join(cells))
+                return "\n".join(text)
+            elif ext == '.pdf':
+                import pdfplumber
+                with pdfplumber.open(file_path) as pdf:
+                    text = []
+                    for page in pdf.pages:
+                        if page.extract_text():
+                            text.append(page.extract_text())
+                    return "\n".join(text)
+            elif ext in ['.xlsx', '.xls', '.csv']:
+                import pandas as pd
+                df = pd.read_csv(file_path) if ext == '.csv' else pd.read_excel(file_path)
+                return df.to_string()
+            elif ext in ['.txt', '.md', '.json']:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    return f.read()
+            else:
+                return f"Unsupported file type: {ext}"
+        except Exception as e:
+            return f"Error reading file: {str(e)}"
+class ImageAnalysisTool:
+    """Analyze images using OCR"""
+    def analyze(self, image_path: str) -> str:
+        if not os.path.exists(image_path):
+            return "Error: Image not found"
+        try:
+            import pytesseract
+            from PIL import Image
+            img = Image.open(image_path)
+            text = pytesseract.image_to_string(img)
+            return f"OCR text:\n{text}" if text.strip() else "No text found"
+        except ImportError:
+            return "Error: pytesseract not installed"
+        except Exception as e:
+            return f"Error: {str(e)}"
+class ResearchAgent:
+    """
+    Cerebras-powered research agent
+    Features:
+    - Web search via Tavily
+    - File reading (PDF, DOCX, CSV, Excel, TXT)
+    - Image OCR
+    - Fast inference via Cerebras
+    """
+    def __init__(
+        self,
+        cerebras_api_key: str = None,
+        tavily_api_key: str = None,
+        model: str = "llama3.1-70b"
+    ):
+        """
+        Initialize agent
+        Args:
+            cerebras_api_key: Cerebras API key (or from env)
+            tavily_api_key: Tavily API key (or from env)
+            model: Cerebras model to use
+        """
+        print("🤖 Initializing Research Agent...")
+        # Get API keys
+        self.cerebras_key = cerebras_api_key or os.getenv("CEREBRAS_API_KEY")
+        self.tavily_key = tavily_api_key or os.getenv("TAVILY_API_KEY")
+        if not self.cerebras_key:
+            raise ValueError("CEREBRAS_API_KEY not found")
+        if not self.tavily_key:
+            raise ValueError("TAVILY_API_KEY not found")
+        # Initialize LLM
+        self.llm = Cerebras(api_key=self.cerebras_key)
+        self.model = model
+        # Initialize tools
+        self.web_search = WebSearchTool(self.tavily_key)
+        self.file_reader = FileReaderTool()
+        self.image_analyzer = ImageAnalysisTool()
+        print("✅ Agent ready")
+    def _call_llm(self, messages: list) -> str:
+        """Call Cerebras LLM"""
+        try:
+            response = self.llm.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=0.1,
+                max_tokens=2000
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            raise RuntimeError(f"LLM error: {str(e)}")
+    def answer(self, question: str, file_path: str = None) -> str:
+        """
+        Answer a question
+        Args:
+            question: The question
+            file_path: Optional file to analyze
+        Returns:
+            Answer string
+        """
+        print(f"📝 Question: {question[:80]}...")
+        # Detect question type
+        is_logic = any(kw in question.lower() for kw in [
+            'opposite', 'backwards', 'reversed'
+        ])
+        # Gather context
+        context_parts = []
+        if file_path:
+            ext = os.path.splitext(file_path)[1].lower()
+            if ext in ['.png', '.jpg', '.jpeg', '.gif', '.bmp']:
+                content = self.image_analyzer.analyze(file_path)
+            else:
+                content = self.file_reader.read(file_path)
+            context_parts.append(f"File:\n{content}")
+        if not is_logic and not file_path:
+            print("  🔍 Searching web...")
+            search = self.web_search.search(question)
+            context_parts.append(f"Search:\n{search}")
+        context = "\n\n".join(context_parts) if context_parts else "Use knowledge."
+        # Create prompt
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an expert researcher. "
+                    "Think step-by-step. "
+                    "Provide ONLY the exact answer - no explanations."
+                )
+            },
+            {
+                "role": "user",
+                "content": f"""Context:
+{context}
+Question: {question}
+Analyze and provide only the final answer:"""
+            }
+        ]
+        # Get answer
+        answer = self._call_llm(messages)
+        # Clean answer
+        answer = answer.strip()
+        for prefix in ["Answer:", "The answer is:", "Final answer:"]:
+            if answer.lower().startswith(prefix.lower()):
+                answer = answer[len(prefix):].strip()
+        print(f"  ✅ Answer: {answer[:80]}...")
+        return answer
+    def __call__(self, question: str, file_path: str = None) -> str:
+        """Allow agent(question) syntax"""
+        return self.answer(question, file_path)