ymcmy commited on
Commit
f438d4c
·
verified ·
1 Parent(s): 1aec19a

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +91 -0
utils.py CHANGED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+ import numpy as np
4
+ import pdfkit
5
+ from datetime import datetime
6
+
7
+ def gen_link():
8
+ if(np.random.choice([True, False])):
9
+ #amc
10
+ np.random.seed()
11
+ year=np.random.randint(2015,2023)
12
+ AB=np.random.choice(['A', 'B'])
13
+ #question
14
+ mu,sigma=18, 5
15
+ s=np.random.normal(mu,sigma,1000)
16
+ s = np.round(s)
17
+ s=s[s>=10]
18
+ s=s[s<=25]
19
+ q=int(np.random.choice(s))
20
+ link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q)
21
+ else:
22
+ #aime
23
+ np.random.seed()
24
+ year=np.random.randint(2005,2023)
25
+ I=np.random.choice(['I', 'II'])
26
+ mu,sigma=6, 4
27
+ s=np.random.normal(mu,sigma,1000)
28
+ s = np.round(s)
29
+ s=s[s>=1]
30
+ s=s[s<=15]
31
+ q=int(np.random.choice(s))
32
+ link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q)
33
+ return link
34
+
35
+ def convert_to_renderable_html(text):
36
+ text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com')
37
+ return text
38
+
39
+ def get_problem(url):
40
+ headers = {
41
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
42
+ }
43
+ response = requests.get(url, headers=headers)
44
+ soup = BeautifulSoup(response.text, 'html.parser')
45
+ problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
46
+
47
+ if problem_headline:
48
+ problem_content = []
49
+ for sibling in problem_headline.parent.find_next_siblings():
50
+ # If the sibling is a headline ('h2'), break the loop, as we've reached the next section
51
+ if sibling.name == 'h2':
52
+ break
53
+ # If the sibling is a paragraph ('p'), add it to the problem content
54
+ elif sibling.name == 'p':
55
+ problem_content.append(convert_to_renderable_html(str(sibling)))
56
+
57
+ # Join all paragraphs into a single string (HTML)
58
+ problem_html = " ".join(problem_content)
59
+ return problem_html
60
+ else:
61
+ print("No problem found")
62
+
63
+ def gen_html(num):
64
+ all_q=str()
65
+ num_tried=0
66
+ num_succ=0
67
+ while(True):
68
+ try:
69
+ link=gen_link()
70
+ print(link)
71
+ hype = '<a href="{}" target="_blank">to link</a>'.format(link)
72
+ qhtml=get_problem(link)
73
+ all_q+=(hype+qhtml)
74
+ num_succ+=1
75
+ except:
76
+ pass
77
+ num_tried+=1
78
+ if num_succ>=num or num_tried>20:
79
+ break
80
+
81
+ all_q=f'''
82
+ <html>
83
+ <head>
84
+ </head>
85
+ <body>
86
+ {all_q}
87
+ </body>
88
+ </html>
89
+ '''
90
+
91
+ return all_q