blade57 commited on
Commit
06ad973
·
verified ·
1 Parent(s): a4e190b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import json
5
+ from huggingface_hub import HfApi
6
+ from pypdf import PdfReader
7
+ from smolagents import CodeAgent, HfApiModel
8
+
9
+ @tool
10
+ def get_hugging_face_top_daily_paper() -> str:
11
+ """
12
+ This is a tool that returns the most upvoted paper on Hugging Face daily papers.
13
+ It returns the title of the paper
14
+ """
15
+ try:
16
+ url = "<https://huggingface.co/papers>"
17
+ response = requests.get(url)
18
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
19
+ soup = BeautifulSoup(response.content, "html.parser")
20
+
21
+ # Extract the title element from the JSON-like data in the "data-props" attribute
22
+ containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
23
+ top_paper = ""
24
+
25
+ for container in containers:
26
+ data_props = container.get('data-props', '')
27
+ if data_props:
28
+ try:
29
+ # Parse the JSON-like string
30
+ json_data = json.loads(data_props.replace('&quot;', '"'))
31
+ if 'dailyPapers' in json_data:
32
+ top_paper = json_data['dailyPapers'][0]['title']
33
+ except json.JSONDecodeError:
34
+ continue
35
+
36
+ return top_paper
37
+ except requests.exceptions.RequestException as e:
38
+ print(f"Error occurred while fetching the HTML: {e}")
39
+ return None
40
+
41
+ @tool
42
+ def get_paper_id_by_title(title: str) -> str:
43
+ """
44
+ This is a tool that returns the arxiv paper id by its title.
45
+ It returns the title of the paper
46
+
47
+ Args:
48
+ title: The paper title for which to get the id.
49
+ """
50
+ api = HfApi()
51
+ papers = api.list_papers(query=title)
52
+ if papers:
53
+ paper = next(iter(papers))
54
+ return paper.id
55
+ else:
56
+ return None
57
+
58
+ @tool
59
+ def read_pdf_file(file_path: str) -> str:
60
+ """
61
+ This function reads the first three pages of a PDF file and returns its content as a string.
62
+ Args:
63
+ file_path: The path to the PDF file.
64
+ Returns:
65
+ A string containing the content of the PDF file.
66
+ """
67
+ content = ""
68
+ reader = PdfReader('paper.pdf')
69
+ print(len(reader.pages))
70
+ pages = reader.pages[:3]
71
+ for page in pages:
72
+ content += page.extract_text()
73
+ return content
74
+
75
+ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
76
+
77
+ model = HfApiModel(model_id=model_id, token=HF_TOKEN)
78
+ agent = CodeAgent(tools=[get_hugging_face_top_daily_paper,
79
+ get_paper_id_by_title,
80
+ download_paper_by_id,
81
+ read_pdf_file],
82
+ model=model,
83
+ add_base_tools=True)
84
+
85
+ if __name__ == "__main__":
86
+ GradioUI(agent).launch()