File size: 4,478 Bytes
f438d4c
 
 
 
cfd35b8
 
 
6f71715
cfd35b8
f438d4c
 
86f48c4
598ca45
f438d4c
598ca45
 
 
 
 
f438d4c
598ca45
 
 
f438d4c
598ca45
f438d4c
598ca45
 
 
 
f438d4c
598ca45
 
 
f438d4c
 
 
 
 
 
 
 
598ca45
f438d4c
 
 
 
598ca45
f438d4c
 
 
 
 
 
 
 
 
 
 
 
 
 
598ca45
 
 
 
f438d4c
598ca45
f438d4c
598ca45
 
 
 
 
 
f438d4c
598ca45
 
f438d4c
 
598ca45
f438d4c
 
598ca45
 
 
 
 
86f48c4
598ca45
 
 
f438d4c
 
598ca45
f438d4c
 
 
86f48c4
6f71715
cfd35b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from bs4 import BeautifulSoup
import requests
import numpy as np
from datetime import datetime
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
import io
from PIL import Image

def gen_link():
    if np.random.choice([True, False]):
        # AMC
        np.random.seed()
        year = np.random.randint(2015, 2023)
        AB = np.random.choice(['A', 'B'])
        # Question
        mu, sigma = 18, 5
        s = np.random.normal(mu, sigma, 1000)
        s = np.round(s)
        s = s[(s >= 10) & (s <= 25)]
        q = int(np.random.choice(s))
        link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
    else:
        # AIME
        np.random.seed()
        year = np.random.randint(2005, 2023)
        I = np.random.choice(['I', 'II'])
        mu, sigma = 6, 4
        s = np.random.normal(mu, sigma, 1000)
        s = np.round(s)
        s = s[(s >= 1) & (s <= 15)]
        q = int(np.random.choice(s))
        link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
    return link

def convert_to_renderable_html(text):
    text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com')
    return text

def get_problem(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})

    if problem_headline:
        problem_content = []
        for sibling in problem_headline.parent.find_next_siblings():
            if sibling.name == 'h2':
                break
            elif sibling.name == 'p':
                problem_content.append(convert_to_renderable_html(str(sibling)))

        problem_html = " ".join(problem_content)
        return problem_html
    else:
        print("No problem found")

def gen_html(num):
    all_q = str()
    num_tried = 0
    num_succ = 0
    while True:
        try:
            link = gen_link()
            print(link)
            hype = f'<a href="{link}" target="_blank">to link</a>'
            qhtml = get_problem(link)
            all_q += (hype + qhtml)
            num_succ += 1
        except Exception as e:
            print(f"Error: {e}")
            pass
        num_tried += 1
        if num_succ >= num or num_tried > 20:
            break

    all_q = f'''
    <html>
    <head>
        <style>
            body {{
                font-family: Arial, sans-serif;
                font-size: 12pt;
            }}
            img.latex {{
                font-size: 12pt; /* Ensure math font size matches the body text size */
            }}
        </style>
    </head>
    <body>
        {all_q}
    </body>
    </html>
    '''
    return all_q

def generate_pdf_content(html_content):
    # Use BeautifulSoup to parse the HTML content
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract text and images
    elements = []
    for tag in soup.find_all(['p', 'a', 'img']):
        if tag.name == 'p':
            elements.append(('text', tag.get_text()))
        elif tag.name == 'a':
            elements.append(('link', tag.get('href'), tag.get_text()))
        elif tag.name == 'img':
            img_url = tag.get('src')
            response = requests.get(img_url)
            img = Image.open(io.BytesIO(response.content))
            elements.append(('image', img))

    return elements

def create_pdf(filename, content):
    c = canvas.Canvas(filename, pagesize=letter)
    width, height = letter
    y = height - 40

    for elem in content:
        if elem[0] == 'text':
            c.drawString(30, y, elem[1])
            y -= 20
        elif elem[0] == 'link':
            c.drawString(30, y, f'{elem[2]}: {elem[1]}')
            y -= 20
        elif elem[0] == 'image':
            img_reader = ImageReader(elem[1])
            c.drawImage(img_reader, 30, y - elem[1].size[1], width=elem[1].size[0], height=elem[1].size[1])
            y -= elem[1].size[1] + 20

        if y < 50:
            c.showPage()
            y = height - 40

    c.save()

def convert_html_to_pdf(html_content, output_filename):
    content = generate_pdf_content(html_content)
    create_pdf(output_filename, content)