ymcmy commited on
Commit
c31fddb
·
verified ·
1 Parent(s): 5d37f40

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +55 -41
utils.py CHANGED
@@ -1,35 +1,32 @@
1
  from bs4 import BeautifulSoup
2
  import requests
3
  import numpy as np
4
- import pdfkit
5
  from datetime import datetime
6
 
7
  def gen_link():
8
- if(np.random.choice([True, False])):
9
- #amc
10
  np.random.seed()
11
- year=np.random.randint(2015,2023)
12
- AB=np.random.choice(['A', 'B'])
13
- #question
14
- mu,sigma=18, 5
15
- s=np.random.normal(mu,sigma,1000)
16
  s = np.round(s)
17
- s=s[s>=10]
18
- s=s[s<=25]
19
- q=int(np.random.choice(s))
20
- link='https://artofproblemsolving.com/wiki/index.php/{}_AMC_12{}_Problems/Problem_{}'.format(year, AB, q)
21
  else:
22
- #aime
23
  np.random.seed()
24
- year=np.random.randint(2005,2023)
25
- I=np.random.choice(['I', 'II'])
26
- mu,sigma=6, 4
27
- s=np.random.normal(mu,sigma,1000)
28
  s = np.round(s)
29
- s=s[s>=1]
30
- s=s[s<=15]
31
- q=int(np.random.choice(s))
32
- link='https://artofproblemsolving.com/wiki/index.php/{}_AIME_{}_Problems/Problem_{}'.format(year,I, q)
33
  return link
34
 
35
  def convert_to_renderable_html(text):
@@ -38,54 +35,71 @@ def convert_to_renderable_html(text):
38
 
39
  def get_problem(url):
40
  headers = {
41
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
42
  }
43
  response = requests.get(url, headers=headers)
44
  soup = BeautifulSoup(response.text, 'html.parser')
45
  problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
46
-
47
  if problem_headline:
48
  problem_content = []
49
  for sibling in problem_headline.parent.find_next_siblings():
50
- # If the sibling is a headline ('h2'), break the loop, as we've reached the next section
51
  if sibling.name == 'h2':
52
  break
53
- # If the sibling is a paragraph ('p'), add it to the problem content
54
  elif sibling.name == 'p':
55
  problem_content.append(convert_to_renderable_html(str(sibling)))
56
 
57
- # Join all paragraphs into a single string (HTML)
58
  problem_html = " ".join(problem_content)
59
  return problem_html
60
  else:
61
  print("No problem found")
62
 
63
  def gen_html(num):
64
- all_q=str()
65
- num_tried=0
66
- num_succ=0
67
- while(True):
68
  try:
69
- link=gen_link()
70
  print(link)
71
- hype = '<a href="{}" target="_blank">to link</a>'.format(link)
72
- qhtml=get_problem(link)
73
- all_q+=(hype+qhtml)
74
- num_succ+=1
75
- except:
 
76
  pass
77
- num_tried+=1
78
- if num_succ>=num or num_tried>20:
79
  break
80
 
81
- all_q=f'''
82
  <html>
83
  <head>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  </head>
85
  <body>
86
- {all_q}
87
  </body>
88
  </html>
89
  '''
90
-
91
  return all_q
 
 
 
 
 
1
  from bs4 import BeautifulSoup
2
  import requests
3
  import numpy as np
 
4
  from datetime import datetime
5
 
6
  def gen_link():
7
+ if np.random.choice([True, False]):
8
+ # AMC
9
  np.random.seed()
10
+ year = np.random.randint(2015, 2023)
11
+ AB = np.random.choice(['A', 'B'])
12
+ # Question
13
+ mu, sigma = 18, 5
14
+ s = np.random.normal(mu, sigma, 1000)
15
  s = np.round(s)
16
+ s = s[(s >= 10) & (s <= 25)]
17
+ q = int(np.random.choice(s))
18
+ link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
 
19
  else:
20
+ # AIME
21
  np.random.seed()
22
+ year = np.random.randint(2005, 2023)
23
+ I = np.random.choice(['I', 'II'])
24
+ mu, sigma = 6, 4
25
+ s = np.random.normal(mu, sigma, 1000)
26
  s = np.round(s)
27
+ s = s[(s >= 1) & (s <= 15)]
28
+ q = int(np.random.choice(s))
29
+ link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
 
30
  return link
31
 
32
  def convert_to_renderable_html(text):
 
35
 
36
  def get_problem(url):
37
  headers = {
38
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
39
  }
40
  response = requests.get(url, headers=headers)
41
  soup = BeautifulSoup(response.text, 'html.parser')
42
  problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
43
+
44
  if problem_headline:
45
  problem_content = []
46
  for sibling in problem_headline.parent.find_next_siblings():
 
47
  if sibling.name == 'h2':
48
  break
 
49
  elif sibling.name == 'p':
50
  problem_content.append(convert_to_renderable_html(str(sibling)))
51
 
 
52
  problem_html = " ".join(problem_content)
53
  return problem_html
54
  else:
55
  print("No problem found")
56
 
57
  def gen_html(num):
58
+ all_q = str()
59
+ num_tried = 0
60
+ num_succ = 0
61
+ while True:
62
  try:
63
+ link = gen_link()
64
  print(link)
65
+ hype = f'<a href="{link}" target="_blank">to link</a>'
66
+ qhtml = get_problem(link)
67
+ all_q += (hype + qhtml)
68
+ num_succ += 1
69
+ except Exception as e:
70
+ print(f"Error: {e}")
71
  pass
72
+ num_tried += 1
73
+ if num_succ >= num or num_tried > 20:
74
  break
75
 
76
+ all_q = f'''
77
  <html>
78
  <head>
79
+ <style>
80
+ body {{
81
+ font-family: Arial, sans-serif;
82
+ font-size: 12pt;
83
+ }}
84
+ img.latex {{
85
+ font-size: 12pt; /* Ensure math font size matches the body text size */
86
+ }}
87
+ a {{
88
+ color: blue;
89
+ text-decoration: none;
90
+ }}
91
+ a:hover {{
92
+ text-decoration: underline;
93
+ }}
94
+ </style>
95
  </head>
96
  <body>
97
+ {all_q}
98
  </body>
99
  </html>
100
  '''
 
101
  return all_q
102
+
103
+ def save_html_to_file(html_content, output_filename):
104
+ with open(output_filename, 'w', encoding='utf-8') as file:
105
+ file.write(html_content)