File size: 4,478 Bytes
f438d4c cfd35b8 6f71715 cfd35b8 f438d4c 86f48c4 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 f438d4c 598ca45 86f48c4 598ca45 f438d4c 598ca45 f438d4c 86f48c4 6f71715 cfd35b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
from bs4 import BeautifulSoup
import requests
import numpy as np
from datetime import datetime
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
import io
from PIL import Image
def gen_link():
if np.random.choice([True, False]):
# AMC
np.random.seed()
year = np.random.randint(2015, 2023)
AB = np.random.choice(['A', 'B'])
# Question
mu, sigma = 18, 5
s = np.random.normal(mu, sigma, 1000)
s = np.round(s)
s = s[(s >= 10) & (s <= 25)]
q = int(np.random.choice(s))
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
else:
# AIME
np.random.seed()
year = np.random.randint(2005, 2023)
I = np.random.choice(['I', 'II'])
mu, sigma = 6, 4
s = np.random.normal(mu, sigma, 1000)
s = np.round(s)
s = s[(s >= 1) & (s <= 15)]
q = int(np.random.choice(s))
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
return link
def convert_to_renderable_html(text):
text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com')
return text
def get_problem(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
if problem_headline:
problem_content = []
for sibling in problem_headline.parent.find_next_siblings():
if sibling.name == 'h2':
break
elif sibling.name == 'p':
problem_content.append(convert_to_renderable_html(str(sibling)))
problem_html = " ".join(problem_content)
return problem_html
else:
print("No problem found")
def gen_html(num):
all_q = str()
num_tried = 0
num_succ = 0
while True:
try:
link = gen_link()
print(link)
hype = f'<a href="{link}" target="_blank">to link</a>'
qhtml = get_problem(link)
all_q += (hype + qhtml)
num_succ += 1
except Exception as e:
print(f"Error: {e}")
pass
num_tried += 1
if num_succ >= num or num_tried > 20:
break
all_q = f'''
<html>
<head>
<style>
body {{
font-family: Arial, sans-serif;
font-size: 12pt;
}}
img.latex {{
font-size: 12pt; /* Ensure math font size matches the body text size */
}}
</style>
</head>
<body>
{all_q}
</body>
</html>
'''
return all_q
def generate_pdf_content(html_content):
# Use BeautifulSoup to parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Extract text and images
elements = []
for tag in soup.find_all(['p', 'a', 'img']):
if tag.name == 'p':
elements.append(('text', tag.get_text()))
elif tag.name == 'a':
elements.append(('link', tag.get('href'), tag.get_text()))
elif tag.name == 'img':
img_url = tag.get('src')
response = requests.get(img_url)
img = Image.open(io.BytesIO(response.content))
elements.append(('image', img))
return elements
def create_pdf(filename, content):
c = canvas.Canvas(filename, pagesize=letter)
width, height = letter
y = height - 40
for elem in content:
if elem[0] == 'text':
c.drawString(30, y, elem[1])
y -= 20
elif elem[0] == 'link':
c.drawString(30, y, f'{elem[2]}: {elem[1]}')
y -= 20
elif elem[0] == 'image':
img_reader = ImageReader(elem[1])
c.drawImage(img_reader, 30, y - elem[1].size[1], width=elem[1].size[0], height=elem[1].size[1])
y -= elem[1].size[1] + 20
if y < 50:
c.showPage()
y = height - 40
c.save()
def convert_html_to_pdf(html_content, output_filename):
content = generate_pdf_content(html_content)
create_pdf(output_filename, content)
|