|
|
from bs4 import BeautifulSoup |
|
|
import requests |
|
|
import numpy as np |
|
|
from datetime import datetime |
|
|
|
|
|
def gen_link(): |
|
|
if(np.random.choice([True, False])): |
|
|
|
|
|
np.random.seed() |
|
|
year=np.random.randint(2015,2023) |
|
|
AB=np.random.choice(['A', 'B']) |
|
|
|
|
|
mu,sigma=18, 5 |
|
|
s=np.random.normal(mu,sigma,1000) |
|
|
s = np.round(s) |
|
|
s=s[s>=10] |
|
|
s=s[s<=25] |
|
|
q=int(np.random.choice(s)) |
|
|
link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q) |
|
|
else: |
|
|
|
|
|
np.random.seed() |
|
|
year=np.random.randint(2005,2023) |
|
|
I=np.random.choice(['I', 'II']) |
|
|
mu,sigma=6, 4 |
|
|
s=np.random.normal(mu,sigma,1000) |
|
|
s = np.round(s) |
|
|
s=s[s>=1] |
|
|
s=s[s<=15] |
|
|
q=int(np.random.choice(s)) |
|
|
link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q) |
|
|
return link |
|
|
|
|
|
def convert_to_renderable_html(text): |
|
|
text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com') |
|
|
return text |
|
|
|
|
|
def get_problem(url): |
|
|
headers = { |
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" |
|
|
} |
|
|
response = requests.get(url, headers=headers) |
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'}) |
|
|
|
|
|
if problem_headline: |
|
|
problem_content = [] |
|
|
for sibling in problem_headline.parent.find_next_siblings(): |
|
|
|
|
|
if sibling.name == 'h2': |
|
|
break |
|
|
|
|
|
elif sibling.name == 'p': |
|
|
problem_content.append(convert_to_renderable_html(str(sibling))) |
|
|
|
|
|
|
|
|
problem_html = " ".join(problem_content) |
|
|
return problem_html |
|
|
else: |
|
|
print("No problem found") |
|
|
|
|
|
def gen_html(num): |
|
|
all_q=str() |
|
|
num_tried=0 |
|
|
num_succ=0 |
|
|
while(True): |
|
|
try: |
|
|
link=gen_link() |
|
|
print(link) |
|
|
hype = '<a href="{}" target="_blank">to link</a>'.format(link) |
|
|
qhtml=get_problem(link) |
|
|
all_q+=(hype+qhtml) |
|
|
num_succ+=1 |
|
|
except: |
|
|
pass |
|
|
num_tried+=1 |
|
|
if num_succ>=num or num_tried>20: |
|
|
break |
|
|
|
|
|
all_q=f''' |
|
|
<html> |
|
|
<head> |
|
|
</head> |
|
|
<body> |
|
|
{all_q} |
|
|
</body> |
|
|
</html> |
|
|
''' |
|
|
|
|
|
return all_q |