Final_Assignment_Template

Sleeping

App Files Files Community

Prasanthkumar commited on Jul 11, 2025

Commit

f64893d

verified ·

1 Parent(s): 2dd54e3

Upload 4 files

Browse files

Files changed (4) hide show

tools/Web_Search_tools.py +50 -0
tools/calculator.py +136 -0
tools/code_interpreter_tools.py +339 -0
tools/document_parser.py +160 -0

tools/Web_Search_tools.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+from supabase.client import Client, create_client
+from langchain_core.tools import tool
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain.tools.retriever import create_retriever_tool
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query."""
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' for doc in search_docs])
+    return {"wiki_results": formatted_search_docs}
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+    formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' for doc in search_docs])
+    return {"web_results": formatted_search_docs}
+@tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query."""
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' for doc in search_docs])
+    return {"arxiv_results": formatted_search_docs}
+@tool
+def similar_question_search(question: str) -> str:
+    """Search the vector database for similar questions and return the first results.
+    Args:
+        question: the question human provided."""
+    matched_docs = vector_store.similarity_search(question, 3)
+    formatted_search_docs = "\n\n---\n\n".join([f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' for doc in matched_docs])
+    return {"similar_questions": formatted_search_docs}

tools/calculator.py ADDED Viewed

	@@ -0,0 +1,136 @@

+from langchain_core.tools import tool
+import cmath
+import math
+@tool
+def add(a: int, b: int) -> int:
+    """
+    Adds two numbers.
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a + b
+@tool
+def sub(a: int, b: int) -> int:
+    """
+    Subracts two numbers.
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a - b
+@tool
+def mul(a: int, b: int) -> int:
+    """
+    multiplies two numbers.
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a * b
+@tool
+def div(a: int, b: int) -> float:
+    """
+    divides two numbers and gave float as a result
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a / b
+@tool
+def floor_div(a: int, b: int) -> int:
+    """
+    divides two numbers and gave integr as a result
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a // b
+@tool
+def square(a: int) -> int:
+    """
+    returns square of the number
+    Args:
+        a (integer): the number
+    """
+    return a * a
+@tool
+def mod(a: int, b: int) -> int:
+    """
+    Modulus of two numbers.
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a % b
+@tool
+def pow(a: int, b: int) -> int:
+    """
+    Get the power of two numbers
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return a ** b
+@tool
+def square_root(a: int):
+    """
+    Square root of the number
+    Args:
+        a (integer): the number
+    """
+    if a < 0:
+        return cmath.sqrt(a)
+    else:
+        return a ** 0.5
+@tool
+def absolute(a: int) -> int:
+    """
+    returns absolute value of the number
+    Args:
+        a (integer): the number
+    """
+    return a if a >= 0 else -a
+@tool
+def gcd(a: int, b: int) -> int:
+    """
+    returns gcd of two numbers using recursion
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    if b == 0:
+        return a
+    return gcd(b, a % b)
+@tool
+def lcm(a: int, b: int) -> int:
+    """
+    returns lcm of two numbers
+    Args:
+        a (integer): the first number
+        b (integer): the second number
+    """
+    return absolute(a * b) // gcd(a, b)
+@tool
+def factorial(a: int) -> int:
+    """
+    returns factorial of a number
+    Args:
+        a (integer): the number
+    """
+    if a <= 1:
+        return 1
+    return a * factorial(a - 1)

tools/code_interpreter_tools.py ADDED Viewed

	@@ -0,0 +1,339 @@

+# ========================== #
+# 📦 Imports and Setup
+# ========================== #
+import os
+import io
+import sys
+import uuid
+import base64
+import traceback
+import contextlib
+import tempfile
+import subprocess
+import sqlite3
+import logging
+from typing import Dict, Any
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from PIL import Image
+from langchain_core.tools import tool
+# ========================== #
+# 📋 Logging Setup
+# ========================== #
+def setup_logger(log_file="execution.log"):
+    logger = logging.getLogger("CodeInterpreter")
+    logger.setLevel(logging.INFO)
+    if not logger.handlers:
+        handler = logging.FileHandler(log_file)
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    return logger
+logger = setup_logger()
+# =================================================================== #
+# Code interpreter tools for languages like Python, Java, C++, SQL and C
+# =================================================================== #
+class Code_Interpreter:
+    def __init__ (
+        self,
+        allowed_modules = None,
+        max_execution_time = 30,
+        working_directory = None
+    )
+        self.allowed_modules = allowed_modules or [
+                "numpy", "pandas", "matplotlib", "scipy", "sklearn", "math", "random", "statistics",
+                "datetime", "collections", "itertools", "functools", "operator", "re", "json", "sympy",
+                "networkx", "nltk", "PIL", "pytesseract", "cmath", "uuid", "tempfile", "requests", "urllib"
+            ]
+        self.max_execution_time = max_execution_time
+        self.working_directory = working_directory or os.path.join(os.getcwd())
+        if not os.path.exists(self.working_directory):
+            os.makedirs(self.working_directory)
+        self.globals = {"__builtins__": __builtins__, "np": np, "pd": pd, "plt": plt, "Image": Image}
+        self.temp_sqlite_db = os.path.join(tempfile.gettempdir(), "code_exec.db")
+    def execute_code(self, code: str, language: str = "python") -> Dict[str, Any]:
+        """Dispatch execution to the appropriate language handler."""
+        lang = langauge.lower()
+        execution_id = str(uuid.uuid4())
+        logger.info(f"[{execution_id}] Executing code in language: {lang}")
+        result = {
+            "execution_id": execution_id,
+            "status": "error",
+            "stdout": "",
+            "stderr": "",
+            "result": None,
+            "plots": [],
+            "dataframes": []
+        }
+        try:
+            if lang == "python":
+                if any(x in code for x in ["os.remove", "shutil.rmtree", "open('/etc", "__import__"]):
+                    raise ValueError("Unsafe code detected.")
+                return self._execute_python(code, execution_id)
+            elif lang == "java":
+                return self._execute_java(code, execution_id)
+            elif lang == "c":
+                return self._execute_c(code, execution_id)
+            elif lang == "sql":
+                return self._execute_sql(code, execution_id)
+            elif lang == "bash":
+                return self._execute_bash(code, execution_id)
+        except Exception as e:
+            result["stderr"] = str(e)
+            logger.error(f"[{execution_id}] Execution error: {e}", exc_info=True)
+        return result
+    def _execute_python(self, code: str, execution_id: str) -> dict:
+        """Execute Python code safely with stdout/stderr capture and plot handling."""
+        output_buffer = io.StringIO()
+        error_buffer = io.StringIO()
+        result = {
+            "execution_id": execution_id,
+            "status": "error",
+            "stdout": "",
+            "stderr": "",
+            "result": None,
+            "plots": [],
+            "dataframes": []
+        }
+        try:
+            exec_dir = os.path.join(self.working_directory, execution_id)
+            os.makedirs(exec_dir, exist_ok=True)
+            plt.switch_backend('Agg')
+            with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(error_buffer):
+                exec_result = exec(code, self.globals)
+                # Capture plots
+                if plt.get_fignums():
+                    for i, fig_num in enumerate(plt.get_fignums()):
+                        fig = plt.figure(fig_num)
+                        img_path = os.path.join(exec_dir, f"plot_{i}.png")
+                        fig.savefig(img_path)
+                        with open(img_path, "rb") as img_file:
+                            img_data = base64.b64encode(img_file.read()).decode('utf-8')
+                            result["plots"].append({"figure_number": fig_num, "data": img_data})
+                # Capture dataframes
+                for var_name, var_value in self.globals.items():
+                    if isinstance(var_value, pd.DataFrame) and len(var_value) > 0:
+                        result["dataframes"].append({
+                            "name": var_name,
+                            "head": var_value.head().to_dict(),
+                            "shape": var_value.shape,
+                            "dtypes": str(var_value.dtypes)
+                        })
+            result["status"] = "success"
+            result["stdout"] = output_buffer.getvalue()
+            result["result"] = exec_result
+            logger.info(f"[{execution_id}] Python code executed successfully.")
+        except Exception as e:
+            result["status"] = "error"
+            result["stderr"] = error_buffer.getvalue() + "\n" + traceback.format_exc()
+            logger.error(f"[{execution_id}] Python execution failed: {e}", exc_info=True)
+        return result
+    def _execute_java(self, code: str, execution_id: str) -> dict:
+        temp_dir = tempfile.mkdtemp()
+        source_path = os.path.join(temp_dir, "Main.java")
+        try:
+            with open(source_path, "w") as f:
+                f.write(code)
+            compile_proc = subprocess.run(["javac", source_path], capture_output=True, text=True, timeout=self.max_execution_time)
+            if compile_proc.returncode != 0:
+                return {
+                    "execution_id": execution_id,
+                    "status": "error",
+                    "stdout": compile_proc.stdout,
+                    "stderr": compile_proc.stderr,
+                    "result": None,
+                    "plots": [],
+                    "dataframes": []
+                }
+            run_proc = subprocess.run(["java", "-cp", temp_dir, "Main"], capture_output=True, text=True, timeout=self.max_execution_time)
+            return {
+                "execution_id": execution_id,
+                "status": "success" if run_proc.returncode == 0 else "error",
+                "stdout": run_proc.stdout,
+                "stderr": run_proc.stderr,
+                "result": None,
+                "plots": [],
+                "dataframes": []
+            }
+        except Exception as e:
+            return {
+                "execution_id": execution_id,
+                "status": "error",
+                "stdout": "",
+                "stderr": str(e),
+                "result": None,
+                "plots": [],
+                "dataframes": []
+            }
+    def _execute_c(self, code: str, execution_id: str) -> dict:
+        temp_dir = tempfile.mkdtemp()
+        source_path = os.path.join(temp_dir, "program.c")
+        binary_path = os.path.join(temp_dir, "program")
+        try:
+            with open(source_path, "w") as f:
+                f.write(code)
+            compile_proc = subprocess.run(["gcc", source_path, "-o", binary_path], capture_output=True, text=True, timeout=self.max_execution_time)
+            if compile_proc.returncode != 0:
+                return {
+                    "execution_id": execution_id,
+                    "status": "error",
+                    "stdout": compile_proc.stdout,
+                    "stderr": compile_proc.stderr,
+                    "result": None,
+                    "plots": [],
+                    "dataframes": []
+                }
+            run_proc = subprocess.run([binary_path], capture_output=True, text=True, timeout=self.max_execution_time)
+            return {
+                "execution_id": execution_id,
+                "status": "success" if run_proc.returncode == 0 else "error",
+                "stdout": run_proc.stdout,
+                "stderr": run_proc.stderr,
+                "result": None,
+                "plots": [],
+                "dataframes": []
+            }
+        except Exception as e:
+            return {
+                "execution_id": execution_id,
+                "status": "error",
+                "stdout": "",
+                "stderr": str(e),
+                "result": None,
+                "plots": [],
+                "dataframes": []
+            }
+    def _execute_sql(self, code: str, execution_id: str) -> dict:
+        result = {
+            "execution_id": execution_id,
+            "status": "error",
+            "stdout": "",
+            "stderr": "",
+            "result": None,
+            "plots": [],
+            "dataframes": []
+        }
+        try:
+            conn = sqlite3.connect(self.temp_sqlite_db)
+            cur = conn.cursor()
+            cur.execute(code)
+            if code.strip().lower().startswith("select"):
+                columns = [desc[0] for desc in cur.description]
+                rows = cur.fetchall()
+                df = pd.DataFrame(rows, columns=columns)
+                result["dataframes"].append({
+                    "name": "query_result",
+                    "head": df.head().to_dict(),
+                    "shape": df.shape,
+                    "dtypes": str(df.dtypes)
+                })
+            else:
+                conn.commit()
+            result["status"] = "success"
+            result["stdout"] = "Query executed successfully."
+        except Exception as e:
+            result["stderr"] = str(e)
+            logger.error(f"[{execution_id}] SQL execution failed: {e}", exc_info=True)
+        finally:
+            conn.close()
+        return result
+    def _execute_bash(self, code: str, execution_id: str) -> dict:
+        try:
+            completed = subprocess.run(code, shell=True, capture_output=True, text=True, timeout=self.max_execution_time)
+            return {
+                "execution_id": execution_id,
+                "status": "success" if completed.returncode == 0 else "error",
+                "stdout": completed.stdout,
+                "stderr": completed.stderr,
+                "result": None,
+                "plots": [],
+                "dataframes": []
+            }
+        except subprocess.TimeoutExpired:
+            return {
+                "execution_id": execution_id,
+                "status": "error",
+                "stdout": "",
+                "stderr": "Execution timed out.",
+                "result": None,
+                "plots": [],
+                "dataframes": []
+            }
+# ================================== #
+# LangChain tool
+# ================================== #
+interpreter = Code_Interpreter()
+@tool
+def execute_code_multilang(code: str, language: str = "python") -> str:
+    """
+    Execute code in multiple languages (Python, Bash, SQL, C, Java) and return results.
+    Args:
+        code (str): the source code to execute
+        language (str): the language of the code
+    """
+    result = interpreter_instance.execute_code(code, language)
+    response = []
+    if result["status"] == "success":
+        response.append(f"✅ Code executed successfully in **{language.upper()}**")
+        if result.get("stdout"):
+            response.append("\n**Standard Output:**\n```\n" + result["stdout"].strip() + "\n```")
+        if result.get("stderr"):
+            response.append("\n**Standard Error (if any):**\n```\n" + result["stderr"].strip() + "\n```")
+        if result.get("dataframes"):
+            for df in result["dataframes"]:
+                preview = pd.DataFrame(df["head"])
+                response.append(f"\n**DataFrame `{df['name']}` (Shape: {df['shape']})**\n```\n{preview}\n```")
+        if result.get("plots"):
+            response.append(f"\n🖼️ {len(result['plots'])} plot(s) generated (encoded)")
+    else:
+        response.append(f"❌ Code execution failed in **{language.upper()}**")
+        if result.get("stderr"):
+            response.append("\n**Error Log:**\n```\n" + result["stderr"].strip() + "\n```")
+    return "\n".join(response)

tools/document_parser.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import os
+import uuid
+import requests
+import tempfile
+from PIL import Image
+import pytesseract
+import pandas as pd
+from urllib.parse import urlparse
+from langchain_core.tools import tool
+from typing import Optional
+import logging
+import pandasql as psql
+# ------------------- 🔧 Logger Setup -------------------
+def setup_logger():
+    logger = logging.getLogger("FileToolLogger")
+    logger.setLevel(logging.INFO)
+    if not logger.handlers:
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    return logger
+logger = setup_logger()
+# ------------------- 📄 Save Content to File -------------------
+@tool
+def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
+    """
+    Save content to a file and return the path.
+    Args:
+        content (str): the content to save to the file
+        filename (str, optional): the name of the file. If not provided, a random name file will be created.
+    """
+    temp_dir = tempfile.gettempdir()
+    if filename is None:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
+        filepath = temp_file.name
+    else:
+        filepath = os.path.join(temp_dir, filename)
+    with open(filepath, "w") as f:
+        f.write(content)
+    return f"File saved to {filepath}. You can read this file to process its contents."
+# ------------------- 📄 Save Content to File -------------------
+@tool
+def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
+    """
+    Download a file from a URL and save it to a temporary location.
+    Args:
+        url (str): the URL of the file to download.
+        filename (str, optional): the name of the file. If not provided, a random name file will be created.
+    """
+    try:
+        # Parse URL to get filename if not provided
+        if not filename:
+            path = urlparse(url).path
+            filename = os.path.basename(path)
+            if not filename:
+                filename = f"downloaded_{uuid.uuid4().hex[:8]}"
+        # Create temporary file
+        temp_dir = tempfile.gettempdir()
+        filepath = os.path.join(temp_dir, filename)
+        # Download the file
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        # Save the file
+        with open(filepath, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return f"File downloaded to {filepath}. You can read this file to process its contents."
+    except Exception as e:
+        return f"Error downloading file: {str(e)}"
+@tool
+def extract_text_from_image(image_path: str) -> str:
+    """
+    Extract text from an image using OCR library pytesseract (if available).
+    Args:
+        image_path (str): the path to the image file.
+    """
+    try:
+        # Open the image
+        image = Image.open(image_path)
+        # Extract text from the image
+        text = pytesseract.image_to_string(image)
+        return f"Extracted text from image:\n\n{text}"
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+@tool
+def analyze_csv_file(file_path: str, query: Optional[str] = None) -> str:
+    """
+    Analyze a CSV file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the CSV file.
+        query (str): Question about the data
+    """
+    if not os.path.isfile(file_path) or not file_path.endswith((".csv")):
+            return "Invalid or missing csv file."
+    try :
+        df = pd.read_csv(file_path)
+        columns = df.columns
+        result = [f"CSV loaded with shape: {df.shape}", f" Columns: {', '.join(columns)}"]
+        if query:
+            result.append(f"\n Query: {query}")
+            result_df = psql.sqldf(query, {"df": df})
+            result.append("Query Result:\n" + result_df.to_string(index=False))
+        else:
+            result.append("\nSummary:\n" + str(df.describe(include='all')))
+        return "\n".join(result)
+    except Exception as e:
+        return f"Error analyzing CSV file: {str(e)}"
+@tool
+def analyze_excel_file(file_path: str, query: Optional[str] = None) -> str:
+    """
+    Analyze a excel file using pandas and answer a question about it.
+    Args:
+        file_path (str): the path to the xls or xlsx file.
+        query (str): Question about the data
+    """
+    if not os.path.isfile(file_path) or not file_path.endswith((".xls", ".xlsx")):
+            return "Invalid or missing Excel file."
+    try :
+        df = pd.read_excel(file_path)
+        columns = df.columns
+        result = [f"CSV loaded with shape: {df.shape}", f" Columns: {', '.join(columns)}"]
+        if query:
+            result.append(f"\n Query: {query}")
+            result_df = psql.sqldf(query, {"df": df})
+            result.append("Query Result:\n" + result_df.to_string(index=False))
+        else:
+            result.append("\nSummary:\n" + str(df.describe(include='all')))
+        return "\n".join(result)
+    except Exception as e:
+        return f"Error analyzing Excel file: {str(e)}"