ymcmy commited on
Commit
37e2932
·
verified ·
1 Parent(s): a4f4309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -38
app.py CHANGED
@@ -3,17 +3,20 @@ from bs4 import BeautifulSoup
3
  import requests
4
  from jinja2 import Template
5
  from weasyprint import HTML
6
- import base64
7
  from urllib.parse import urljoin
8
- import time
9
  import os
10
  import warnings
 
 
11
  warnings.filterwarnings("ignore")
12
 
13
- phy2_conceptual_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions"
14
- phy2_problem_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises"
15
- img_base_url='https://openstax.org'
16
- href_base_url='https://openstax.org/books/college-physics-ap-courses-2e/pages/'
 
 
 
17
 
18
  def get_html(url):
19
  headers = {
@@ -24,49 +27,48 @@ def get_html(url):
24
  response.encoding = 'utf-8'
25
  response.raise_for_status() # Raise an HTTPError for bad responses
26
  soup = BeautifulSoup(response.text, 'html.parser')
27
- #print(soup)
28
  return soup
29
  except requests.RequestException as e:
30
- print(f"An error occurred when getting html: {e}")
31
  return None
32
 
33
- def get_question(exercises, question_index, img_base_url=img_base_url,href_base_url=href_base_url):
34
  question_index = question_index - 1
35
 
36
  if question_index >= len(exercises):
37
  return "Question index out of range."
38
 
39
  exercise_div = exercises[question_index]
40
- print(exercise_div)
41
 
42
  for img_tag in exercise_div.find_all('img'):
43
  img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
44
- #print("src changed")
45
 
46
  for a_tag in exercise_div.find_all('a'):
47
  a_tag['href'] = urljoin(href_base_url, a_tag['href'])
48
- #print("href changed")
49
 
50
  return str(exercise_div)
51
 
52
- def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,problem_url):
53
- conceptual_html=get_html(conceptual_url)
54
- problem_html=get_html(problem_url)
 
 
 
 
55
 
56
  conceptual_exercises = conceptual_html.find_all('div', {'data-type': 'exercise'})
57
  problem_exercises = problem_html.find_all('div', {'data-type': 'exercise'})
58
- questions=[]
59
 
60
  for i in conceptual_list:
61
- questions.append(get_question(conceptual_exercises,i,img_base_url,href_base_url))
62
 
63
  for i in problem_list:
64
- questions.append(get_question(problem_exercises,i,img_base_url,href_base_url))
65
 
66
  return questions
67
 
68
- from jinja2 import Template
69
-
70
  def generate_html(chapter_num, conceptual_input, problem_input, path):
71
  conceptual_list = list(map(int, conceptual_input.split(",")) if conceptual_input else [])
72
  problem_list = list(map(int, problem_input.split(",")) if problem_input else [])
@@ -130,27 +132,39 @@ def generate_html(chapter_num, conceptual_input, problem_input, path):
130
  with open(path, 'w', encoding='utf-8') as f:
131
  f.write(rendered_html)
132
 
133
-
134
  def generate_pdf(input_path, output_path):
135
- HTML(input_path).write_pdf(output_path)
 
 
 
136
 
137
  def main_function(unit_num, conceptual_input, problem_input):
138
- if not conceptual_input and not problem_input:
139
- return "Both lists cannot be empty. Please provide at least one."
140
-
141
- for file_name in os.listdir( "./" ):
142
- if file_name.endswith(".pdf"):
143
- os.remove(os.path.join("./",file_name))
144
-
145
- abs_path = "D:\\projects\\phy_pdf" + "\\questions_" + str(int(unit_num))
146
- html_path = abs_path + ".html"
147
- generate_html(unit_num, conceptual_input, problem_input, html_path)
148
- pdf_path = abs_path + ".pdf"
149
- generate_pdf(html_path, pdf_path)
150
- print("pdf generated")
151
- print(pdf_path)
152
- return pdf_path
153
-
 
 
 
 
 
 
 
 
 
 
154
 
155
  iface = gr.Interface(
156
  fn=main_function,
 
3
  import requests
4
  from jinja2 import Template
5
  from weasyprint import HTML
 
6
  from urllib.parse import urljoin
 
7
  import os
8
  import warnings
9
+ import logging
10
+
11
  warnings.filterwarnings("ignore")
12
 
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')
15
+
16
+ phy2_conceptual_link = "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions"
17
+ phy2_problem_link = "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises"
18
+ img_base_url = 'https://openstax.org'
19
+ href_base_url = 'https://openstax.org/books/college-physics-ap-courses-2e/pages/'
20
 
21
  def get_html(url):
22
  headers = {
 
27
  response.encoding = 'utf-8'
28
  response.raise_for_status() # Raise an HTTPError for bad responses
29
  soup = BeautifulSoup(response.text, 'html.parser')
 
30
  return soup
31
  except requests.RequestException as e:
32
+ logging.error(f"An error occurred when getting HTML: {e}")
33
  return None
34
 
35
+ def get_question(exercises, question_index, img_base_url=img_base_url, href_base_url=href_base_url):
36
  question_index = question_index - 1
37
 
38
  if question_index >= len(exercises):
39
  return "Question index out of range."
40
 
41
  exercise_div = exercises[question_index]
42
+ logging.debug(f"Processing exercise: {exercise_div}")
43
 
44
  for img_tag in exercise_div.find_all('img'):
45
  img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
 
46
 
47
  for a_tag in exercise_div.find_all('a'):
48
  a_tag['href'] = urljoin(href_base_url, a_tag['href'])
 
49
 
50
  return str(exercise_div)
51
 
52
+ def get_all_questions(unit_num, conceptual_list, problem_list, conceptual_url, problem_url):
53
+ conceptual_html = get_html(conceptual_url)
54
+ if conceptual_html is None:
55
+ return []
56
+ problem_html = get_html(problem_url)
57
+ if problem_html is None:
58
+ return []
59
 
60
  conceptual_exercises = conceptual_html.find_all('div', {'data-type': 'exercise'})
61
  problem_exercises = problem_html.find_all('div', {'data-type': 'exercise'})
62
+ questions = []
63
 
64
  for i in conceptual_list:
65
+ questions.append(get_question(conceptual_exercises, i, img_base_url, href_base_url))
66
 
67
  for i in problem_list:
68
+ questions.append(get_question(problem_exercises, i, img_base_url, href_base_url))
69
 
70
  return questions
71
 
 
 
72
  def generate_html(chapter_num, conceptual_input, problem_input, path):
73
  conceptual_list = list(map(int, conceptual_input.split(",")) if conceptual_input else [])
74
  problem_list = list(map(int, problem_input.split(",")) if problem_input else [])
 
132
  with open(path, 'w', encoding='utf-8') as f:
133
  f.write(rendered_html)
134
 
 
135
  def generate_pdf(input_path, output_path):
136
+ try:
137
+ HTML(input_path).write_pdf(output_path)
138
+ except Exception as e:
139
+ logging.error(f"An error occurred when generating PDF: {e}")
140
 
141
  def main_function(unit_num, conceptual_input, problem_input):
142
+ try:
143
+ if not conceptual_input and not problem_input:
144
+ return "Both lists cannot be empty. Please provide at least one."
145
+
146
+ # Ensure the directory exists
147
+ abs_path = "D:\\projects\\phy_pdf"
148
+ if not os.path.exists(abs_path):
149
+ os.makedirs(abs_path)
150
+
151
+ # Delete all existing PDFs in the directory
152
+ for file_name in os.listdir(abs_path):
153
+ if file_name.endswith(".pdf"):
154
+ os.remove(os.path.join(abs_path, file_name))
155
+
156
+ file_name = "questions_" + str(int(unit_num))
157
+ html_path = os.path.join(abs_path, file_name + ".html")
158
+ pdf_path = os.path.join(abs_path, file_name + ".pdf")
159
+
160
+ generate_html(unit_num, conceptual_input, problem_input, html_path)
161
+ generate_pdf(html_path, pdf_path)
162
+
163
+ logging.info("PDF generated successfully")
164
+ return pdf_path
165
+ except Exception as e:
166
+ logging.error(f"An error occurred in main_function: {e}")
167
+ return "An error occurred. Please check the logs for more details."
168
 
169
  iface = gr.Interface(
170
  fn=main_function,