Spaces:

ymcmy
/

AMC_AIME_Random_Problem_Set_Generator

Running

App Files Files Community

ymcmy commited on Jul 25, 2024

Commit

bae9501

verified ·

1 Parent(s): e6b5626

Update utils.py

Browse files

Files changed (1) hide show

utils.py +41 -95

utils.py CHANGED Viewed

@@ -1,37 +1,35 @@
 from bs4 import BeautifulSoup
 import requests
 import numpy as np
 from datetime import datetime
-from reportlab.lib.pagesizes import letter
-from reportlab.pdfgen import canvas
-from reportlab.lib.utils import ImageReader
-import io
-from PIL import Image
 def gen_link():
-    if np.random.choice([True, False]):
-        # AMC
         np.random.seed()
-        year = np.random.randint(2015, 2023)
-        AB = np.random.choice(['A', 'B'])
-        # Question
-        mu, sigma = 18, 5
-        s = np.random.normal(mu, sigma, 1000)
         s = np.round(s)
-        s = s[(s >= 10) & (s <= 25)]
-        q = int(np.random.choice(s))
-        link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
     else:
-        # AIME
         np.random.seed()
-        year = np.random.randint(2005, 2023)
-        I = np.random.choice(['I', 'II'])
-        mu, sigma = 6, 4
-        s = np.random.normal(mu, sigma, 1000)
         s = np.round(s)
-        s = s[(s >= 1) & (s <= 15)]
-        q = int(np.random.choice(s))
-        link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
     return link
 def convert_to_renderable_html(text):
@@ -40,106 +38,54 @@ def convert_to_renderable_html(text):
 def get_problem(url):
     headers = {
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
     }
     response = requests.get(url, headers=headers)
     soup = BeautifulSoup(response.text, 'html.parser')
     problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
     if problem_headline:
         problem_content = []
         for sibling in problem_headline.parent.find_next_siblings():
             if sibling.name == 'h2':
                 break
             elif sibling.name == 'p':
                 problem_content.append(convert_to_renderable_html(str(sibling)))
         problem_html = " ".join(problem_content)
         return problem_html
     else:
         print("No problem found")
 def gen_html(num):
-    all_q = str()
-    num_tried = 0
-    num_succ = 0
-    while True:
         try:
-            link = gen_link()
             print(link)
-            hype = f'<a href="{link}" target="_blank">to link</a>'
-            qhtml = get_problem(link)
-            all_q += (hype + qhtml)
-            num_succ += 1
-        except Exception as e:
-            print(f"Error: {e}")
             pass
-        num_tried += 1
-        if num_succ >= num or num_tried > 20:
             break
-    all_q = f'''
     <html>
     <head>
-        <style>
-            body {{
-                font-family: Arial, sans-serif;
-                font-size: 12pt;
-            }}
-            img.latex {{
-                font-size: 12pt; /* Ensure math font size matches the body text size */
-            }}
-        </style>
     </head>
     <body>
-        {all_q}
     </body>
     </html>
     '''
     return all_q
-def generate_pdf_content(html_content):
-    # Use BeautifulSoup to parse the HTML content
-    soup = BeautifulSoup(html_content, 'html.parser')
-    # Extract text and images
-    elements = []
-    for tag in soup.find_all(['p', 'a', 'img']):
-        if tag.name == 'p':
-            elements.append(('text', tag.get_text()))
-        elif tag.name == 'a':
-            elements.append(('link', tag.get('href'), tag.get_text()))
-        elif tag.name == 'img':
-            img_url = tag.get('src')
-            response = requests.get(img_url)
-            img = Image.open(io.BytesIO(response.content))
-            elements.append(('image', img))
-    return elements
-def create_pdf(filename, content):
-    c = canvas.Canvas(filename, pagesize=letter)
-    width, height = letter
-    y = height - 40
-    for elem in content:
-        if elem[0] == 'text':
-            c.drawString(30, y, elem[1])
-            y -= 20
-        elif elem[0] == 'link':
-            c.drawString(30, y, f'{elem[2]}: {elem[1]}')
-            y -= 20
-        elif elem[0] == 'image':
-            img_reader = ImageReader(elem[1])
-            c.drawImage(img_reader, 30, y - elem[1].size[1], width=elem[1].size[0], height=elem[1].size[1])
-            y -= elem[1].size[1] + 20
-        if y < 50:
-            c.showPage()
-            y = height - 40
-    c.save()
-def convert_html_to_pdf(html_content, output_filename):
-    content = generate_pdf_content(html_content)
-    create_pdf(output_filename, content)

 from bs4 import BeautifulSoup
 import requests
 import numpy as np
+import pdfkit
 from datetime import datetime
 def gen_link():
+    if(np.random.choice([True, False])):
+        #amc
         np.random.seed()
+        year=np.random.randint(2015,2023)
+        AB=np.random.choice(['A', 'B'])
+        #question
+        mu,sigma=18, 5
+        s=np.random.normal(mu,sigma,1000)
         s = np.round(s)
+        s=s[s>=10]
+        s=s[s<=25]
+        q=int(np.random.choice(s))
+        link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q)
     else:
+        #aime
         np.random.seed()
+        year=np.random.randint(2005,2023)
+        I=np.random.choice(['I', 'II'])
+        mu,sigma=6, 4
+        s=np.random.normal(mu,sigma,1000)
         s = np.round(s)
+        s=s[s>=1]
+        s=s[s<=15]
+        q=int(np.random.choice(s))
+        link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q)
     return link
 def convert_to_renderable_html(text):
 def get_problem(url):
     headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
     }
     response = requests.get(url, headers=headers)
     soup = BeautifulSoup(response.text, 'html.parser')
     problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
     if problem_headline:
         problem_content = []
         for sibling in problem_headline.parent.find_next_siblings():
+            # If the sibling is a headline ('h2'), break the loop, as we've reached the next section
             if sibling.name == 'h2':
                 break
+            # If the sibling is a paragraph ('p'), add it to the problem content
             elif sibling.name == 'p':
                 problem_content.append(convert_to_renderable_html(str(sibling)))
+        # Join all paragraphs into a single string (HTML)
         problem_html = " ".join(problem_content)
         return problem_html
     else:
         print("No problem found")
 def gen_html(num):
+    all_q=str()
+    num_tried=0
+    num_succ=0
+    while(True):
         try:
+            link=gen_link()
             print(link)
+            hype = '<a href="{}" target="_blank">to link</a>'.format(link)
+            qhtml=get_problem(link)
+            all_q+=(hype+qhtml)
+            num_succ+=1
+        except:
             pass
+        num_tried+=1
+        if num_succ>=num or num_tried>20:
             break
+    all_q=f'''
     <html>
     <head>
     </head>
     <body>
+    {all_q}
     </body>
     </html>
     '''
     return all_q