Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,35 +1,32 @@
|
|
| 1 |
from bs4 import BeautifulSoup
|
| 2 |
import requests
|
| 3 |
import numpy as np
|
| 4 |
-
import pdfkit
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
def gen_link():
|
| 8 |
-
if
|
| 9 |
-
#
|
| 10 |
np.random.seed()
|
| 11 |
-
year=np.random.randint(2015,2023)
|
| 12 |
-
AB=np.random.choice(['A', 'B'])
|
| 13 |
-
#
|
| 14 |
-
mu,sigma=18, 5
|
| 15 |
-
s=np.random.normal(mu,sigma,1000)
|
| 16 |
s = np.round(s)
|
| 17 |
-
s=s[s>=10]
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q)
|
| 21 |
else:
|
| 22 |
-
#
|
| 23 |
np.random.seed()
|
| 24 |
-
year=np.random.randint(2005,2023)
|
| 25 |
-
I=np.random.choice(['I', 'II'])
|
| 26 |
-
mu,sigma=6, 4
|
| 27 |
-
s=np.random.normal(mu,sigma,1000)
|
| 28 |
s = np.round(s)
|
| 29 |
-
s=s[s>=1]
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q)
|
| 33 |
return link
|
| 34 |
|
| 35 |
def convert_to_renderable_html(text):
|
|
@@ -38,54 +35,71 @@ def convert_to_renderable_html(text):
|
|
| 38 |
|
| 39 |
def get_problem(url):
|
| 40 |
headers = {
|
| 41 |
-
|
| 42 |
}
|
| 43 |
response = requests.get(url, headers=headers)
|
| 44 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 45 |
problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
|
| 46 |
-
|
| 47 |
if problem_headline:
|
| 48 |
problem_content = []
|
| 49 |
for sibling in problem_headline.parent.find_next_siblings():
|
| 50 |
-
# If the sibling is a headline ('h2'), break the loop, as we've reached the next section
|
| 51 |
if sibling.name == 'h2':
|
| 52 |
break
|
| 53 |
-
# If the sibling is a paragraph ('p'), add it to the problem content
|
| 54 |
elif sibling.name == 'p':
|
| 55 |
problem_content.append(convert_to_renderable_html(str(sibling)))
|
| 56 |
|
| 57 |
-
# Join all paragraphs into a single string (HTML)
|
| 58 |
problem_html = " ".join(problem_content)
|
| 59 |
return problem_html
|
| 60 |
else:
|
| 61 |
print("No problem found")
|
| 62 |
|
| 63 |
def gen_html(num):
|
| 64 |
-
all_q=str()
|
| 65 |
-
num_tried=0
|
| 66 |
-
num_succ=0
|
| 67 |
-
while
|
| 68 |
try:
|
| 69 |
-
link=gen_link()
|
| 70 |
print(link)
|
| 71 |
-
hype = '<a href="{}" target="_blank">to link</a>'
|
| 72 |
-
qhtml=get_problem(link)
|
| 73 |
-
all_q+=(hype+qhtml)
|
| 74 |
-
num_succ+=1
|
| 75 |
-
except:
|
|
|
|
| 76 |
pass
|
| 77 |
-
num_tried+=1
|
| 78 |
-
if num_succ>=num or num_tried>20:
|
| 79 |
break
|
| 80 |
|
| 81 |
-
all_q=f'''
|
| 82 |
<html>
|
| 83 |
<head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
</head>
|
| 85 |
<body>
|
| 86 |
-
|
| 87 |
</body>
|
| 88 |
</html>
|
| 89 |
'''
|
| 90 |
-
|
| 91 |
return all_q
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from bs4 import BeautifulSoup
|
| 2 |
import requests
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
from datetime import datetime
|
| 5 |
|
| 6 |
def gen_link():
|
| 7 |
+
if np.random.choice([True, False]):
|
| 8 |
+
# AMC
|
| 9 |
np.random.seed()
|
| 10 |
+
year = np.random.randint(2015, 2023)
|
| 11 |
+
AB = np.random.choice(['A', 'B'])
|
| 12 |
+
# Question
|
| 13 |
+
mu, sigma = 18, 5
|
| 14 |
+
s = np.random.normal(mu, sigma, 1000)
|
| 15 |
s = np.round(s)
|
| 16 |
+
s = s[(s >= 10) & (s <= 25)]
|
| 17 |
+
q = int(np.random.choice(s))
|
| 18 |
+
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
|
|
|
|
| 19 |
else:
|
| 20 |
+
# AIME
|
| 21 |
np.random.seed()
|
| 22 |
+
year = np.random.randint(2005, 2023)
|
| 23 |
+
I = np.random.choice(['I', 'II'])
|
| 24 |
+
mu, sigma = 6, 4
|
| 25 |
+
s = np.random.normal(mu, sigma, 1000)
|
| 26 |
s = np.round(s)
|
| 27 |
+
s = s[(s >= 1) & (s <= 15)]
|
| 28 |
+
q = int(np.random.choice(s))
|
| 29 |
+
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
|
|
|
|
| 30 |
return link
|
| 31 |
|
| 32 |
def convert_to_renderable_html(text):
|
|
|
|
| 35 |
|
| 36 |
def get_problem(url):
|
| 37 |
headers = {
|
| 38 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
| 39 |
}
|
| 40 |
response = requests.get(url, headers=headers)
|
| 41 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 42 |
problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
|
| 43 |
+
|
| 44 |
if problem_headline:
|
| 45 |
problem_content = []
|
| 46 |
for sibling in problem_headline.parent.find_next_siblings():
|
|
|
|
| 47 |
if sibling.name == 'h2':
|
| 48 |
break
|
|
|
|
| 49 |
elif sibling.name == 'p':
|
| 50 |
problem_content.append(convert_to_renderable_html(str(sibling)))
|
| 51 |
|
|
|
|
| 52 |
problem_html = " ".join(problem_content)
|
| 53 |
return problem_html
|
| 54 |
else:
|
| 55 |
print("No problem found")
|
| 56 |
|
| 57 |
def gen_html(num):
|
| 58 |
+
all_q = str()
|
| 59 |
+
num_tried = 0
|
| 60 |
+
num_succ = 0
|
| 61 |
+
while True:
|
| 62 |
try:
|
| 63 |
+
link = gen_link()
|
| 64 |
print(link)
|
| 65 |
+
hype = f'<a href="{link}" target="_blank">to link</a>'
|
| 66 |
+
qhtml = get_problem(link)
|
| 67 |
+
all_q += (hype + qhtml)
|
| 68 |
+
num_succ += 1
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f"Error: {e}")
|
| 71 |
pass
|
| 72 |
+
num_tried += 1
|
| 73 |
+
if num_succ >= num or num_tried > 20:
|
| 74 |
break
|
| 75 |
|
| 76 |
+
all_q = f'''
|
| 77 |
<html>
|
| 78 |
<head>
|
| 79 |
+
<style>
|
| 80 |
+
body {{
|
| 81 |
+
font-family: Arial, sans-serif;
|
| 82 |
+
font-size: 12pt;
|
| 83 |
+
}}
|
| 84 |
+
img.latex {{
|
| 85 |
+
font-size: 12pt; /* Ensure math font size matches the body text size */
|
| 86 |
+
}}
|
| 87 |
+
a {{
|
| 88 |
+
color: blue;
|
| 89 |
+
text-decoration: none;
|
| 90 |
+
}}
|
| 91 |
+
a:hover {{
|
| 92 |
+
text-decoration: underline;
|
| 93 |
+
}}
|
| 94 |
+
</style>
|
| 95 |
</head>
|
| 96 |
<body>
|
| 97 |
+
{all_q}
|
| 98 |
</body>
|
| 99 |
</html>
|
| 100 |
'''
|
|
|
|
| 101 |
return all_q
|
| 102 |
+
|
| 103 |
+
def save_html_to_file(html_content, output_filename):
|
| 104 |
+
with open(output_filename, 'w', encoding='utf-8') as file:
|
| 105 |
+
file.write(html_content)
|