File size: 2,748 Bytes
f438d4c
 
 
bae9501
f438d4c
 
 
bae9501
 
f438d4c
bae9501
 
 
 
 
f438d4c
bae9501
 
 
 
f438d4c
bae9501
f438d4c
bae9501
 
 
 
f438d4c
bae9501
 
 
 
f438d4c
 
 
 
 
 
 
 
bae9501
f438d4c
 
 
 
bae9501
f438d4c
 
 
bae9501
f438d4c
 
bae9501
f438d4c
 
 
bae9501
f438d4c
 
 
 
 
 
bae9501
 
 
 
f438d4c
bae9501
f438d4c
bae9501
 
 
 
 
f438d4c
bae9501
 
f438d4c
 
bae9501
f438d4c
 
 
 
bae9501
f438d4c
 
 
bae9501
86f48c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from bs4 import BeautifulSoup
import requests
import numpy as np
import pdfkit
from datetime import datetime

def gen_link():
    if(np.random.choice([True, False])):
        #amc
        np.random.seed()
        year=np.random.randint(2015,2023)
        AB=np.random.choice(['A', 'B'])
        #question
        mu,sigma=18, 5
        s=np.random.normal(mu,sigma,1000)
        s = np.round(s)
        s=s[s>=10]
        s=s[s<=25]
        q=int(np.random.choice(s))
        link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q)
    else:
        #aime
        np.random.seed()
        year=np.random.randint(2005,2023)
        I=np.random.choice(['I', 'II'])
        mu,sigma=6, 4
        s=np.random.normal(mu,sigma,1000)
        s = np.round(s)
        s=s[s>=1]
        s=s[s<=15]
        q=int(np.random.choice(s))
        link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q)
    return link

def convert_to_renderable_html(text):
    text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com')
    return text

def get_problem(url):
    headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
    
    if problem_headline:
        problem_content = []
        for sibling in problem_headline.parent.find_next_siblings():
            # If the sibling is a headline ('h2'), break the loop, as we've reached the next section
            if sibling.name == 'h2':
                break
            # If the sibling is a paragraph ('p'), add it to the problem content
            elif sibling.name == 'p':
                problem_content.append(convert_to_renderable_html(str(sibling)))

        # Join all paragraphs into a single string (HTML)
        problem_html = " ".join(problem_content)
        return problem_html
    else:
        print("No problem found")

def gen_html(num):
    all_q=str()
    num_tried=0
    num_succ=0
    while(True):
        try:
            link=gen_link()
            print(link)
            hype = '<a href="{}" target="_blank">to link</a>'.format(link)
            qhtml=get_problem(link)
            all_q+=(hype+qhtml)
            num_succ+=1
        except:
            pass
        num_tried+=1
        if num_succ>=num or num_tried>20:
            break

    all_q=f'''
    <html>
    <head>
    </head>
    <body>
    {all_q}
    </body>
    </html>
    '''
    
    return all_q