ymcmy commited on
Commit
54533ed
·
verified ·
1 Parent(s): 3b881b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -35
app.py CHANGED
@@ -2,31 +2,13 @@ import gradio as gr
2
  from bs4 import BeautifulSoup
3
  import requests
4
  from jinja2 import Template
5
- from selenium import webdriver
6
- from selenium.webdriver.chrome.options import Options
7
-
8
- from selenium.webdriver.chrome.service import Service
9
- from webdriver_manager.chrome import ChromeDriverManager
10
-
11
  from weasyprint import HTML
12
-
13
  import base64
14
  from urllib.parse import urljoin
15
  import time
16
- #from langchain.agents.agent_toolkits import create_python_agent
17
- #from langchain.agents import load_tools, initialize_agent
18
- #from langchain.agents import AgentType
19
- #from langchain.tools.python.tool import PythonREPLTool
20
- #from langchain.python import PythonREPL
21
- #from langchain.chat_models import ChatOpenAI
22
- #from langchain.prompts import ChatPromptTemplate
23
- #from langchain.chains import LLMChain
24
  import os
25
- from dotenv import load_dotenv, find_dotenv
26
- _ = load_dotenv(find_dotenv()) # read local .env file
27
  import warnings
28
  warnings.filterwarnings("ignore")
29
- #from webdriver_manager.chrome import ChromeDriverManager
30
 
31
  phy2_conceptual_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions"
32
  phy2_problem_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises"
@@ -48,32 +30,25 @@ def get_html(url):
48
  print(f"An error occurred when getting html: {e}")
49
  return None
50
 
51
- # Finalize the function to extract the HTML of a question along with adjacent tables, images, or hyperlinks
52
- def get_question(exercises, question_index, img_base_url="https://openstax.org",href_base_url="https://openstax.org/books/college-physics-ap-courses-2e/pages/"):
53
  question_index = question_index - 1
54
 
55
- # Find all divs with 'data-type' attribute set to 'exercise'
56
-
57
  if question_index >= len(exercises):
58
  return "Question index out of range."
59
-
60
- # Get the exercise div
61
  exercise_div = exercises[question_index]
62
  print(exercise_div)
63
-
64
- # Convert img src to absolute URLs
65
  for img_tag in exercise_div.find_all('img'):
66
  img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
67
  #print("src changed")
68
-
69
- # Convert hyperlinks to absolute URLs
70
  for a_tag in exercise_div.find_all('a'):
71
  a_tag['href'] = urljoin(href_base_url, a_tag['href'])
72
- print("href changed")
73
 
74
  return str(exercise_div)
75
 
76
-
77
  def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,problem_url):
78
  conceptual_html=get_html(conceptual_url)
79
  problem_html=get_html(problem_url)
@@ -90,7 +65,6 @@ def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,probl
90
 
91
  return questions
92
 
93
-
94
  from jinja2 import Template
95
 
96
  def generate_html(chapter_num, conceptual_input, problem_input,path):
@@ -101,7 +75,6 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
101
  problem_url=phy2_problem_link.format(int(chapter_num))
102
 
103
  questions=get_all_questions(int(chapter_num),conceptual_list,problem_list,conceptual_url,problem_url)
104
- # Create a Jinja2 template for the HTML content
105
  template_str = '''
106
  <!DOCTYPE html>
107
  <html>
@@ -113,7 +86,7 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
113
  <style>
114
  body {
115
  font-family: 'Calibri', sans-serif;
116
- font-size: 25px;
117
  }
118
  .page-break {
119
  page-break-after: always;
@@ -146,7 +119,6 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
146
  '''
147
  template = Template(template_str)
148
 
149
- # Render the template with the chapter number, problem lists, and questions
150
  rendered_html = template.render(chapter_number=int(chapter_num),
151
  conceptual_problem_list=conceptual_list,
152
  problems_and_exercise_list=problem_list,
@@ -154,7 +126,6 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
154
  conceptual_url=conceptual_url,
155
  problem_url=problem_url)
156
 
157
- # Save the rendered HTML to a file
158
  with open(path, 'w', encoding='utf-8') as f:
159
  f.write(rendered_html)
160
 
@@ -166,6 +137,10 @@ def main_function(unit_num, conceptual_input, problem_input):
166
  if not conceptual_input and not problem_input:
167
  return "Both lists cannot be empty. Please provide at least one."
168
 
 
 
 
 
169
  abs_path = "D:\\projects\\phy_pdf" + "\\questions_" + str(int(unit_num))
170
  html_path = abs_path + ".html"
171
  generate_html(unit_num, conceptual_input, problem_input, html_path)
 
2
  from bs4 import BeautifulSoup
3
  import requests
4
  from jinja2 import Template
 
 
 
 
 
 
5
  from weasyprint import HTML
 
6
  import base64
7
  from urllib.parse import urljoin
8
  import time
 
 
 
 
 
 
 
 
9
  import os
 
 
10
  import warnings
11
  warnings.filterwarnings("ignore")
 
12
 
13
  phy2_conceptual_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions"
14
  phy2_problem_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises"
 
30
  print(f"An error occurred when getting html: {e}")
31
  return None
32
 
33
+ def get_question(exercises, question_index, img_base_url=img_base_url,href_base_url=href_base_url):
 
34
  question_index = question_index - 1
35
 
 
 
36
  if question_index >= len(exercises):
37
  return "Question index out of range."
38
+
 
39
  exercise_div = exercises[question_index]
40
  print(exercise_div)
41
+
 
42
  for img_tag in exercise_div.find_all('img'):
43
  img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
44
  #print("src changed")
45
+
 
46
  for a_tag in exercise_div.find_all('a'):
47
  a_tag['href'] = urljoin(href_base_url, a_tag['href'])
48
+ #print("href changed")
49
 
50
  return str(exercise_div)
51
 
 
52
  def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,problem_url):
53
  conceptual_html=get_html(conceptual_url)
54
  problem_html=get_html(problem_url)
 
65
 
66
  return questions
67
 
 
68
  from jinja2 import Template
69
 
70
  def generate_html(chapter_num, conceptual_input, problem_input,path):
 
75
  problem_url=phy2_problem_link.format(int(chapter_num))
76
 
77
  questions=get_all_questions(int(chapter_num),conceptual_list,problem_list,conceptual_url,problem_url)
 
78
  template_str = '''
79
  <!DOCTYPE html>
80
  <html>
 
86
  <style>
87
  body {
88
  font-family: 'Calibri', sans-serif;
89
+ font-size: 20px;
90
  }
91
  .page-break {
92
  page-break-after: always;
 
119
  '''
120
  template = Template(template_str)
121
 
 
122
  rendered_html = template.render(chapter_number=int(chapter_num),
123
  conceptual_problem_list=conceptual_list,
124
  problems_and_exercise_list=problem_list,
 
126
  conceptual_url=conceptual_url,
127
  problem_url=problem_url)
128
 
 
129
  with open(path, 'w', encoding='utf-8') as f:
130
  f.write(rendered_html)
131
 
 
137
  if not conceptual_input and not problem_input:
138
  return "Both lists cannot be empty. Please provide at least one."
139
 
140
+ for file_name in os.listdir(abs_path):
141
+ if file_name.endswith(".pdf"):
142
+ os.remove(os.path.join(abs_path, file_name))
143
+
144
  abs_path = "D:\\projects\\phy_pdf" + "\\questions_" + str(int(unit_num))
145
  html_path = abs_path + ".html"
146
  generate_html(unit_num, conceptual_input, problem_input, html_path)