Spaces:

blade57
/

Top_HF_Paper_Summary

Runtime error

App Files Files Community

blade57 commited on Apr 7, 2025

Commit

06ad973

verified ·

1 Parent(s): a4e190b

Create app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from smolagents import tool
+import requests
+from bs4 import BeautifulSoup
+import json
+from huggingface_hub import HfApi
+from pypdf import PdfReader
+from smolagents import CodeAgent, HfApiModel
+@tool
+def get_hugging_face_top_daily_paper() -> str:
+  """
+  This is a tool that returns the most upvoted paper on Hugging Face daily papers.
+  It returns the title of the paper
+  """
+  try:
+    url = "<https://huggingface.co/papers>"
+    response = requests.get(url)
+    response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+    soup = BeautifulSoup(response.content, "html.parser")
+    # Extract the title element from the JSON-like data in the "data-props" attribute
+    containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
+    top_paper = ""
+    for container in containers:
+      data_props = container.get('data-props', '')
+      if data_props:
+        try:
+          # Parse the JSON-like string
+          json_data = json.loads(data_props.replace('&quot;', '"'))
+          if 'dailyPapers' in json_data:
+            top_paper = json_data['dailyPapers'][0]['title']
+        except json.JSONDecodeError:
+          continue
+    return top_paper
+  except requests.exceptions.RequestException as e:
+    print(f"Error occurred while fetching the HTML: {e}")
+    return None
+@tool
+def get_paper_id_by_title(title: str) -> str:
+  """
+  This is a tool that returns the arxiv paper id by its title.
+  It returns the title of the paper
+  Args:
+    title: The paper title for which to get the id.
+  """
+  api = HfApi()
+  papers = api.list_papers(query=title)
+  if papers:
+    paper = next(iter(papers))
+    return paper.id
+  else:
+    return None
+@tool
+def read_pdf_file(file_path: str) -> str:
+  """
+  This function reads the first three pages of a PDF file and returns its content as a string.
+  Args:
+    file_path: The path to the PDF file.
+  Returns:
+    A string containing the content of the PDF file.
+  """
+  content = ""
+  reader = PdfReader('paper.pdf')
+  print(len(reader.pages))
+  pages = reader.pages[:3]
+  for page in pages:
+    content += page.extract_text()
+  return content
+model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model = HfApiModel(model_id=model_id, token=HF_TOKEN)
+agent = CodeAgent(tools=[get_hugging_face_top_daily_paper,
+                         get_paper_id_by_title,
+                         download_paper_by_id,
+                         read_pdf_file],
+                  model=model,
+                  add_base_tools=True)
+if __name__ == "__main__":
+    GradioUI(agent).launch()