| | from bs4 import BeautifulSoup |
| | import requests |
| | import numpy as np |
| | from datetime import datetime |
| | from reportlab.lib.pagesizes import letter |
| | from reportlab.pdfgen import canvas |
| | from reportlab.lib.utils import ImageReader |
| | import io |
| | from PIL import Image |
| |
|
| | def gen_link(): |
| | if np.random.choice([True, False]): |
| | |
| | np.random.seed() |
| | year = np.random.randint(2015, 2023) |
| | AB = np.random.choice(['A', 'B']) |
| | |
| | mu, sigma = 18, 5 |
| | s = np.random.normal(mu, sigma, 1000) |
| | s = np.round(s) |
| | s = s[(s >= 10) & (s <= 25)] |
| | q = int(np.random.choice(s)) |
| | link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}' |
| | else: |
| | |
| | np.random.seed() |
| | year = np.random.randint(2005, 2023) |
| | I = np.random.choice(['I', 'II']) |
| | mu, sigma = 6, 4 |
| | s = np.random.normal(mu, sigma, 1000) |
| | s = np.round(s) |
| | s = s[(s >= 1) & (s <= 15)] |
| | q = int(np.random.choice(s)) |
| | link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}' |
| | return link |
| |
|
| | def convert_to_renderable_html(text): |
| | text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com') |
| | return text |
| |
|
| | def get_problem(url): |
| | headers = { |
| | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" |
| | } |
| | response = requests.get(url, headers=headers) |
| | soup = BeautifulSoup(response.text, 'html.parser') |
| | problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'}) |
| |
|
| | if problem_headline: |
| | problem_content = [] |
| | for sibling in problem_headline.parent.find_next_siblings(): |
| | if sibling.name == 'h2': |
| | break |
| | elif sibling.name == 'p': |
| | problem_content.append(convert_to_renderable_html(str(sibling))) |
| |
|
| | problem_html = " ".join(problem_content) |
| | return problem_html |
| | else: |
| | print("No problem found") |
| |
|
| | def gen_html(num): |
| | all_q = str() |
| | num_tried = 0 |
| | num_succ = 0 |
| | while True: |
| | try: |
| | link = gen_link() |
| | print(link) |
| | hype = f'<a href="{link}" target="_blank">to link</a>' |
| | qhtml = get_problem(link) |
| | all_q += (hype + qhtml) |
| | num_succ += 1 |
| | except Exception as e: |
| | print(f"Error: {e}") |
| | pass |
| | num_tried += 1 |
| | if num_succ >= num or num_tried > 20: |
| | break |
| |
|
| | all_q = f''' |
| | <html> |
| | <head> |
| | <style> |
| | body {{ |
| | font-family: Arial, sans-serif; |
| | font-size: 12pt; |
| | }} |
| | img.latex {{ |
| | font-size: 12pt; /* Ensure math font size matches the body text size */ |
| | }} |
| | </style> |
| | </head> |
| | <body> |
| | {all_q} |
| | </body> |
| | </html> |
| | ''' |
| | return all_q |
| |
|
| | def generate_pdf_content(html_content): |
| | |
| | soup = BeautifulSoup(html_content, 'html.parser') |
| |
|
| | |
| | elements = [] |
| | for tag in soup.find_all(['p', 'a', 'img']): |
| | if tag.name == 'p': |
| | elements.append(('text', tag.get_text())) |
| | elif tag.name == 'a': |
| | elements.append(('link', tag.get('href'), tag.get_text())) |
| | elif tag.name == 'img': |
| | img_url = tag.get('src') |
| | response = requests.get(img_url) |
| | img = Image.open(io.BytesIO(response.content)) |
| | elements.append(('image', img)) |
| |
|
| | return elements |
| |
|
| | def create_pdf(filename, content): |
| | c = canvas.Canvas(filename, pagesize=letter) |
| | width, height = letter |
| | y = height - 40 |
| |
|
| | for elem in content: |
| | if elem[0] == 'text': |
| | c.drawString(30, y, elem[1]) |
| | y -= 20 |
| | elif elem[0] == 'link': |
| | c.drawString(30, y, f'{elem[2]}: {elem[1]}') |
| | y -= 20 |
| | elif elem[0] == 'image': |
| | img_reader = ImageReader(elem[1]) |
| | c.drawImage(img_reader, 30, y - elem[1].size[1], width=elem[1].size[0], height=elem[1].size[1]) |
| | y -= elem[1].size[1] + 20 |
| |
|
| | if y < 50: |
| | c.showPage() |
| | y = height - 40 |
| |
|
| | c.save() |
| |
|
| | def convert_html_to_pdf(html_content, output_filename): |
| | content = generate_pdf_content(html_content) |
| | create_pdf(output_filename, content) |
| |
|