Final_Assignment_Template

Sleeping

App Files Files Community

WeByT3 commited on Jun 4, 2025

Commit

a1c4a3e

verified ·

1 Parent(s): 8644d6e

Update tools.py

Browse files

Files changed (1) hide show

tools.py +27 -8

tools.py CHANGED Viewed

@@ -2,6 +2,9 @@ from langchain_core.tools import tool
 import wikipediaapi
 import pandas as pd
 import requests
 @tool
@@ -80,12 +83,12 @@ def search_wikipedia(page_title: str, language: str) -> str:
         for i, table in enumerate(tables):
             if isinstance(table, pd.DataFrame):
-                markdown = table.head(10).to_markdown(index=False)
                 markdown_tables.append(f"\n---\n**Table {i + 1}:**\n{markdown}")
         table_output = "\n".join(markdown_tables) if markdown_tables else "No tables found on this page."
-        return f"Title: {page.title}\n\nText: {page.summary[:100]}\n\n{table_output}"
     except Exception as e:
         return f"Error retrieving Wikipedia content: {str(e)}"
@@ -127,10 +130,8 @@ def duckduckgo_search(query: str) -> str:
 @tool
 def search_papers(query: str) -> str:
-    """Search for academic papers using Semantic Scholar.
-        Args:
-            query: The query to search the papers in Semantic Scholar
-    """
     url = "https://api.semanticscholar.org/graph/v1/paper/search"
     params = {
         "query": query,
@@ -146,6 +147,7 @@ def search_papers(query: str) -> str:
             return "No papers found."
         results = []
         for paper in data["data"]:
             title = paper.get("title", "No title")
             authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
@@ -153,10 +155,27 @@ def search_papers(query: str) -> str:
             abstract = paper.get("abstract", "No abstract available.")
             link = paper.get("url", "")
-            result = f"**{title}** ({year}) by {authors}\n{abstract}\nLink: {link}"
             results.append(result)
-        return "\n\n".join(results)
     except Exception as e:
         return f"Error fetching papers: {e}"

 import wikipediaapi
 import pandas as pd
 import requests
+import fitz  # PyMuPDF
+import io
+from urllib.parse import urlparse
 @tool
         for i, table in enumerate(tables):
             if isinstance(table, pd.DataFrame):
+                markdown = table.iloc[:10, :5].to_markdown(index=False)
                 markdown_tables.append(f"\n---\n**Table {i + 1}:**\n{markdown}")
         table_output = "\n".join(markdown_tables) if markdown_tables else "No tables found on this page."
+        return f"Text: {page.summary[:75]}\n\n{table_output}"
     except Exception as e:
         return f"Error retrieving Wikipedia content: {str(e)}"
 @tool
 def search_papers(query: str) -> str:
+    """Search for academic papers and retrieve their content when possible."""
     url = "https://api.semanticscholar.org/graph/v1/paper/search"
     params = {
         "query": query,
             return "No papers found."
         results = []
         for paper in data["data"]:
             title = paper.get("title", "No title")
             authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
             abstract = paper.get("abstract", "No abstract available.")
             link = paper.get("url", "")
+            full_text = "Full text not available."
+            # Attempt to download and parse PDF (for arXiv)
+            if "arxiv.org" in link:
+                pdf_url = link.replace("abs", "pdf") + ".pdf"
+                try:
+                    pdf_response = requests.get(pdf_url)
+                    doc = fitz.open(stream=pdf_response.content, filetype="pdf")
+                    full_text = "\n".join(page.get_text() for page in doc[:3])  # Only first 3 pages
+                    doc.close()
+                except Exception as pdf_err:
+                    full_text = f"Failed to retrieve full text: {pdf_err}"
+            result = f"""**{title}** ({year}) by {authors}
+                        Abstract: {abstract}
+                        Link: {link}
+                        Full Text (first pages):\n{full_text}"""
             results.append(result)
+        return "\n\n---\n\n".join(results)
     except Exception as e:
         return f"Error fetching papers: {e}"