Spaces:

mabelwang21
/

Agents_Final_Assignment

Sleeping

App Files Files Community

mabelwang21 commited on May 19, 2025

Commit

96229ca

1 Parent(s): fec32f4

add download_file tool, update add_files

Browse files

Files changed (1) hide show

agent.py +91 -16

agent.py CHANGED Viewed

@@ -5,6 +5,9 @@ import json
 import operator as op
 from pathlib import Path
 from typing import List, TypedDict, Annotated, Optional
 from langchain.tools import tool, StructuredTool
 from langchain_community.document_loaders import (
@@ -206,12 +209,53 @@ def python_interpreter(code: str) -> str:
     except Exception as e:
         return f"Error executing Python code: {e}"
 # Update tools list
 tools: List[StructuredTool] = [
     calculate, web_search, wikipedia_search, image_recognition,
     read_pdf, read_csv, read_spreadsheet, transcribe_audio,
     youtube_transcript_tool, youtube_transcript_api, read_jsonl,
-    python_interpreter  # Add python_interpreter
 ]
 class AgentState(TypedDict):
@@ -237,23 +281,45 @@ class MyAgent:
     def add_files(self, file_paths: List[str]):
         """
         Load and index documents for RAG based on file extensions or URLs.
-        Supports: PDF, CSV, audio (mp3/wav), and YouTube URLs.
         """
         for path in file_paths:
             ext = Path(path).suffix.lower()
-            if ext == ".csv":
-                loader = CSVLoader(path)
-                self.docs.extend(loader.load())
-            elif ext == ".pdf":
-                loader = PyPDFLoader(path)
-                self.docs.extend(loader.load())
-            elif ext in [".mp3", ".wav"]:
-                loader = AssemblyAIAudioTranscriptLoader(file_path=path)
-                self.docs.extend(loader.load())
-            elif "youtube" in path:
-                loader = YoutubeLoader.from_youtube_url(path)
-                self.docs.extend(loader.load())
-            else:
                 continue
     def build_retriever(self):
@@ -286,7 +352,16 @@ class MyAgent:
         # Use structured tool attributes
         tool_desc = "\n".join(f"{t.name}: {t.description}" for t in self.tools)
-        sys_msg = SystemMessage(content=f"{SYSTEM_PROMPT}\n\nTools:\n{tool_desc}")
         state["messages"].append(sys_msg)
         # Optionally load RAG docs

 import operator as op
 from pathlib import Path
 from typing import List, TypedDict, Annotated, Optional
+import requests
+from urllib.parse import urlparse
+import shutil
 from langchain.tools import tool, StructuredTool
 from langchain_community.document_loaders import (
     except Exception as e:
         return f"Error executing Python code: {e}"
+@tool
+def download_file(url_or_path: str, save_dir: str = "./downloads") -> str:
+    """Download a file from URL or copy from local path to the downloads directory."""
+    try:
+        # Create downloads directory if it doesn't exist
+        save_dir = Path(save_dir)
+        save_dir.mkdir(parents=True, exist_ok=True)
+        # Check if input is URL or local path
+        if url_or_path.startswith(('http://', 'https://')):
+            # Handle URL download
+            response = requests.get(url_or_path, stream=True)
+            response.raise_for_status()
+            # Get filename from URL or Content-Disposition header
+            filename = response.headers.get('Content-Disposition')
+            if filename and 'filename=' in filename:
+                filename = filename.split('filename=')[1].strip('"')
+            else:
+                filename = Path(urlparse(url_or_path).path).name
+            save_path = save_dir / filename
+            # Download file
+            with open(save_path, 'wb') as f:
+                shutil.copyfileobj(response.raw, f)
+        else:
+            # Handle local file copy
+            src_path = Path(url_or_path)
+            if not src_path.exists():
+                return f"Error: Source file {url_or_path} not found"
+            save_path = save_dir / src_path.name
+            shutil.copy2(src_path, save_path)
+        return f"File successfully saved to {save_path}"
+    except Exception as e:
+        return f"Error downloading/copying file: {e}"
 # Update tools list
 tools: List[StructuredTool] = [
     calculate, web_search, wikipedia_search, image_recognition,
     read_pdf, read_csv, read_spreadsheet, transcribe_audio,
     youtube_transcript_tool, youtube_transcript_api, read_jsonl,
+    python_interpreter, download_file  # Add download_file
 ]
 class AgentState(TypedDict):
     def add_files(self, file_paths: List[str]):
         """
         Load and index documents for RAG based on file extensions or URLs.
+        Supports: PDF, CSV, Excel, JSONL, images, audio (mp3/wav), and YouTube URLs.
         """
         for path in file_paths:
             ext = Path(path).suffix.lower()
+            try:
+                if ext == ".csv":
+                    loader = CSVLoader(path)
+                    self.docs.extend(loader.load())
+                elif ext == ".pdf":
+                    loader = PyPDFLoader(path)
+                    self.docs.extend(loader.load())
+                elif ext in [".xlsx", ".xls"]:
+                    # Handle spreadsheets
+                    import pandas as pd
+                    df = pd.read_excel(path)
+                    text_content = df.to_string()
+                    self.docs.append(Document(page_content=text_content))
+                elif ext == ".jsonl":
+                    # Handle JSONL files
+                    with open(path, 'r', encoding='utf-8') as file:
+                        content = [json.loads(line) for line in file]
+                        text_content = json.dumps(content, indent=2)
+                        self.docs.append(Document(page_content=text_content))
+                elif ext in [".png", ".jpg", ".jpeg"]:
+                    # Handle images
+                    text = pytesseract.image_to_string(Image.open(path))
+                    if text.strip():
+                        self.docs.append(Document(page_content=text))
+                elif ext in [".mp3", ".wav"]:
+                    loader = AssemblyAIAudioTranscriptLoader(file_path=path)
+                    self.docs.extend(loader.load())
+                elif "youtube" in path:
+                    loader = YoutubeLoader.from_youtube_url(path)
+                    self.docs.extend(loader.load())
+                else:
+                    print(f"Unsupported file type: {ext}")
+                    continue
+            except Exception as e:
+                print(f"Error loading {path}: {e}")
                 continue
     def build_retriever(self):
         # Use structured tool attributes
         tool_desc = "\n".join(f"{t.name}: {t.description}" for t in self.tools)
+        # Enhanced system prompt with RAG guidance
+        rag_prompt = """
+        If the question seems to be about any loaded documents, ALWAYS:
+        1. Use the rag_search tool first to find relevant information
+        2. Base your answer on the retrieved content
+        3. If no relevant content is found, say so
+        """
+        sys_msg = SystemMessage(content=f"{SYSTEM_PROMPT}\n\n{rag_prompt if file_paths else ''}\n\nTools:\n{tool_desc}")
         state["messages"].append(sys_msg)
         # Optionally load RAG docs