Spaces:

ymcmy
/

AMC_AIME_Random_Problem_Set_Generator

Sleeping

App Files Files Community

AMC_AIME_Random_Problem_Set_Generator / utils.py

ymcmy

Update utils.py

cfd35b8 verified over 1 year ago

raw

history blame

4.48 kB

	from bs4 import BeautifulSoup
	import requests
	import numpy as np
	from datetime import datetime
	from reportlab.lib.pagesizes import letter
	from reportlab.pdfgen import canvas
	from reportlab.lib.utils import ImageReader
	import io
	from PIL import Image

	def gen_link():
	if np.random.choice([True, False]):
	# AMC
	np.random.seed()
	year = np.random.randint(2015, 2023)
	AB = np.random.choice(['A', 'B'])
	# Question
	mu, sigma = 18, 5
	s = np.random.normal(mu, sigma, 1000)
	s = np.round(s)
	s = s[(s >= 10) & (s <= 25)]
	q = int(np.random.choice(s))
	link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
	else:
	# AIME
	np.random.seed()
	year = np.random.randint(2005, 2023)
	I = np.random.choice(['I', 'II'])
	mu, sigma = 6, 4
	s = np.random.normal(mu, sigma, 1000)
	s = np.round(s)
	s = s[(s >= 1) & (s <= 15)]
	q = int(np.random.choice(s))
	link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
	return link

	def convert_to_renderable_html(text):
	text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com')
	return text

	def get_problem(url):
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
	}
	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.text, 'html.parser')
	problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})

	if problem_headline:
	problem_content = []
	for sibling in problem_headline.parent.find_next_siblings():
	if sibling.name == 'h2':
	break
	elif sibling.name == 'p':
	problem_content.append(convert_to_renderable_html(str(sibling)))

	problem_html = " ".join(problem_content)
	return problem_html
	else:
	print("No problem found")

	def gen_html(num):
	all_q = str()
	num_tried = 0
	num_succ = 0
	while True:
	try:
	link = gen_link()
	print(link)
	hype = f'<a href="{link}" target="_blank">to link</a>'
	qhtml = get_problem(link)
	all_q += (hype + qhtml)
	num_succ += 1
	except Exception as e:
	print(f"Error: {e}")
	pass
	num_tried += 1
	if num_succ >= num or num_tried > 20:
	break

	all_q = f'''
	<html>
	<head>
	<style>
	body {{
	font-family: Arial, sans-serif;
	font-size: 12pt;
	}}
	img.latex {{
	font-size: 12pt; /* Ensure math font size matches the body text size */
	}}
	</style>
	</head>
	<body>
	{all_q}
	</body>
	</html>
	'''
	return all_q

	def generate_pdf_content(html_content):
	# Use BeautifulSoup to parse the HTML content
	soup = BeautifulSoup(html_content, 'html.parser')

	# Extract text and images
	elements = []
	for tag in soup.find_all(['p', 'a', 'img']):
	if tag.name == 'p':
	elements.append(('text', tag.get_text()))
	elif tag.name == 'a':
	elements.append(('link', tag.get('href'), tag.get_text()))
	elif tag.name == 'img':
	img_url = tag.get('src')
	response = requests.get(img_url)
	img = Image.open(io.BytesIO(response.content))
	elements.append(('image', img))

	return elements

	def create_pdf(filename, content):
	c = canvas.Canvas(filename, pagesize=letter)
	width, height = letter
	y = height - 40

	for elem in content:
	if elem[0] == 'text':
	c.drawString(30, y, elem[1])
	y -= 20
	elif elem[0] == 'link':
	c.drawString(30, y, f'{elem[2]}: {elem[1]}')
	y -= 20
	elif elem[0] == 'image':
	img_reader = ImageReader(elem[1])
	c.drawImage(img_reader, 30, y - elem[1].size[1], width=elem[1].size[0], height=elem[1].size[1])
	y -= elem[1].size[1] + 20

	if y < 50:
	c.showPage()
	y = height - 40

	c.save()

	def convert_html_to_pdf(html_content, output_filename):
	content = generate_pdf_content(html_content)
	create_pdf(output_filename, content)