ymcmy commited on
Commit
bae9501
·
verified ·
1 Parent(s): e6b5626

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +41 -95
utils.py CHANGED
@@ -1,37 +1,35 @@
1
  from bs4 import BeautifulSoup
2
  import requests
3
  import numpy as np
 
4
  from datetime import datetime
5
- from reportlab.lib.pagesizes import letter
6
- from reportlab.pdfgen import canvas
7
- from reportlab.lib.utils import ImageReader
8
- import io
9
- from PIL import Image
10
 
11
  def gen_link():
12
- if np.random.choice([True, False]):
13
- # AMC
14
  np.random.seed()
15
- year = np.random.randint(2015, 2023)
16
- AB = np.random.choice(['A', 'B'])
17
- # Question
18
- mu, sigma = 18, 5
19
- s = np.random.normal(mu, sigma, 1000)
20
  s = np.round(s)
21
- s = s[(s >= 10) & (s <= 25)]
22
- q = int(np.random.choice(s))
23
- link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
 
24
  else:
25
- # AIME
26
  np.random.seed()
27
- year = np.random.randint(2005, 2023)
28
- I = np.random.choice(['I', 'II'])
29
- mu, sigma = 6, 4
30
- s = np.random.normal(mu, sigma, 1000)
31
  s = np.round(s)
32
- s = s[(s >= 1) & (s <= 15)]
33
- q = int(np.random.choice(s))
34
- link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
 
35
  return link
36
 
37
  def convert_to_renderable_html(text):
@@ -40,106 +38,54 @@ def convert_to_renderable_html(text):
40
 
41
  def get_problem(url):
42
  headers = {
43
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
44
  }
45
  response = requests.get(url, headers=headers)
46
  soup = BeautifulSoup(response.text, 'html.parser')
47
  problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
48
-
49
  if problem_headline:
50
  problem_content = []
51
  for sibling in problem_headline.parent.find_next_siblings():
 
52
  if sibling.name == 'h2':
53
  break
 
54
  elif sibling.name == 'p':
55
  problem_content.append(convert_to_renderable_html(str(sibling)))
56
 
 
57
  problem_html = " ".join(problem_content)
58
  return problem_html
59
  else:
60
  print("No problem found")
61
 
62
  def gen_html(num):
63
- all_q = str()
64
- num_tried = 0
65
- num_succ = 0
66
- while True:
67
  try:
68
- link = gen_link()
69
  print(link)
70
- hype = f'<a href="{link}" target="_blank">to link</a>'
71
- qhtml = get_problem(link)
72
- all_q += (hype + qhtml)
73
- num_succ += 1
74
- except Exception as e:
75
- print(f"Error: {e}")
76
  pass
77
- num_tried += 1
78
- if num_succ >= num or num_tried > 20:
79
  break
80
 
81
- all_q = f'''
82
  <html>
83
  <head>
84
- <style>
85
- body {{
86
- font-family: Arial, sans-serif;
87
- font-size: 12pt;
88
- }}
89
- img.latex {{
90
- font-size: 12pt; /* Ensure math font size matches the body text size */
91
- }}
92
- </style>
93
  </head>
94
  <body>
95
- {all_q}
96
  </body>
97
  </html>
98
  '''
 
99
  return all_q
100
-
101
- def generate_pdf_content(html_content):
102
- # Use BeautifulSoup to parse the HTML content
103
- soup = BeautifulSoup(html_content, 'html.parser')
104
-
105
- # Extract text and images
106
- elements = []
107
- for tag in soup.find_all(['p', 'a', 'img']):
108
- if tag.name == 'p':
109
- elements.append(('text', tag.get_text()))
110
- elif tag.name == 'a':
111
- elements.append(('link', tag.get('href'), tag.get_text()))
112
- elif tag.name == 'img':
113
- img_url = tag.get('src')
114
- response = requests.get(img_url)
115
- img = Image.open(io.BytesIO(response.content))
116
- elements.append(('image', img))
117
-
118
- return elements
119
-
120
- def create_pdf(filename, content):
121
- c = canvas.Canvas(filename, pagesize=letter)
122
- width, height = letter
123
- y = height - 40
124
-
125
- for elem in content:
126
- if elem[0] == 'text':
127
- c.drawString(30, y, elem[1])
128
- y -= 20
129
- elif elem[0] == 'link':
130
- c.drawString(30, y, f'{elem[2]}: {elem[1]}')
131
- y -= 20
132
- elif elem[0] == 'image':
133
- img_reader = ImageReader(elem[1])
134
- c.drawImage(img_reader, 30, y - elem[1].size[1], width=elem[1].size[0], height=elem[1].size[1])
135
- y -= elem[1].size[1] + 20
136
-
137
- if y < 50:
138
- c.showPage()
139
- y = height - 40
140
-
141
- c.save()
142
-
143
- def convert_html_to_pdf(html_content, output_filename):
144
- content = generate_pdf_content(html_content)
145
- create_pdf(output_filename, content)
 
1
  from bs4 import BeautifulSoup
2
  import requests
3
  import numpy as np
4
+ import pdfkit
5
  from datetime import datetime
 
 
 
 
 
6
 
7
  def gen_link():
8
+ if(np.random.choice([True, False])):
9
+ #amc
10
  np.random.seed()
11
+ year=np.random.randint(2015,2023)
12
+ AB=np.random.choice(['A', 'B'])
13
+ #question
14
+ mu,sigma=18, 5
15
+ s=np.random.normal(mu,sigma,1000)
16
  s = np.round(s)
17
+ s=s[s>=10]
18
+ s=s[s<=25]
19
+ q=int(np.random.choice(s))
20
+ link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q)
21
  else:
22
+ #aime
23
  np.random.seed()
24
+ year=np.random.randint(2005,2023)
25
+ I=np.random.choice(['I', 'II'])
26
+ mu,sigma=6, 4
27
+ s=np.random.normal(mu,sigma,1000)
28
  s = np.round(s)
29
+ s=s[s>=1]
30
+ s=s[s<=15]
31
+ q=int(np.random.choice(s))
32
+ link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q)
33
  return link
34
 
35
  def convert_to_renderable_html(text):
 
38
 
39
  def get_problem(url):
40
  headers = {
41
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
42
  }
43
  response = requests.get(url, headers=headers)
44
  soup = BeautifulSoup(response.text, 'html.parser')
45
  problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
46
+
47
  if problem_headline:
48
  problem_content = []
49
  for sibling in problem_headline.parent.find_next_siblings():
50
+ # If the sibling is a headline ('h2'), break the loop, as we've reached the next section
51
  if sibling.name == 'h2':
52
  break
53
+ # If the sibling is a paragraph ('p'), add it to the problem content
54
  elif sibling.name == 'p':
55
  problem_content.append(convert_to_renderable_html(str(sibling)))
56
 
57
+ # Join all paragraphs into a single string (HTML)
58
  problem_html = " ".join(problem_content)
59
  return problem_html
60
  else:
61
  print("No problem found")
62
 
63
  def gen_html(num):
64
+ all_q=str()
65
+ num_tried=0
66
+ num_succ=0
67
+ while(True):
68
  try:
69
+ link=gen_link()
70
  print(link)
71
+ hype = '<a href="{}" target="_blank">to link</a>'.format(link)
72
+ qhtml=get_problem(link)
73
+ all_q+=(hype+qhtml)
74
+ num_succ+=1
75
+ except:
 
76
  pass
77
+ num_tried+=1
78
+ if num_succ>=num or num_tried>20:
79
  break
80
 
81
+ all_q=f'''
82
  <html>
83
  <head>
 
 
 
 
 
 
 
 
 
84
  </head>
85
  <body>
86
+ {all_q}
87
  </body>
88
  </html>
89
  '''
90
+
91
  return all_q