Spaces:

ymcmy
/

physics_pdf

Runtime error

App Files Files Community

ymcmy commited on Jul 25, 2024

Commit

54533ed

verified ·

1 Parent(s): 3b881b3

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -35

app.py CHANGED Viewed

@@ -2,31 +2,13 @@ import gradio as gr
 from bs4 import BeautifulSoup
 import requests
 from jinja2 import Template
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service
-from webdriver_manager.chrome import ChromeDriverManager
 from weasyprint import HTML
 import base64
 from urllib.parse import urljoin
 import time
-#from langchain.agents.agent_toolkits import create_python_agent
-#from langchain.agents import load_tools, initialize_agent
-#from langchain.agents import AgentType
-#from langchain.tools.python.tool import PythonREPLTool
-#from langchain.python import PythonREPL
-#from langchain.chat_models import ChatOpenAI
-#from langchain.prompts import ChatPromptTemplate
-#from langchain.chains import LLMChain
 import os
-from dotenv import load_dotenv, find_dotenv
-_ = load_dotenv(find_dotenv()) # read local .env file
 import warnings
 warnings.filterwarnings("ignore")
-#from webdriver_manager.chrome import ChromeDriverManager
 phy2_conceptual_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions"
 phy2_problem_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises"
@@ -48,32 +30,25 @@ def get_html(url):
         print(f"An error occurred when getting html: {e}")
         return None
-# Finalize the function to extract the HTML of a question along with adjacent tables, images, or hyperlinks
-def get_question(exercises, question_index, img_base_url="https://openstax.org",href_base_url="https://openstax.org/books/college-physics-ap-courses-2e/pages/"):
     question_index = question_index - 1
-    # Find all divs with 'data-type' attribute set to 'exercise'
     if question_index >= len(exercises):
         return "Question index out of range."
-    # Get the exercise div
     exercise_div = exercises[question_index]
     print(exercise_div)
-    # Convert img src to absolute URLs
     for img_tag in exercise_div.find_all('img'):
         img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
         #print("src changed")
-    # Convert hyperlinks to absolute URLs
     for a_tag in exercise_div.find_all('a'):
         a_tag['href'] = urljoin(href_base_url, a_tag['href'])
-        print("href changed")
     return str(exercise_div)
 def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,problem_url):
     conceptual_html=get_html(conceptual_url)
     problem_html=get_html(problem_url)
@@ -90,7 +65,6 @@ def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,probl
     return questions
 from jinja2 import Template
 def generate_html(chapter_num, conceptual_input, problem_input,path):
@@ -101,7 +75,6 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
     problem_url=phy2_problem_link.format(int(chapter_num))
     questions=get_all_questions(int(chapter_num),conceptual_list,problem_list,conceptual_url,problem_url)
-    # Create a Jinja2 template for the HTML content
     template_str = '''
     <!DOCTYPE html>
     <html>
@@ -113,7 +86,7 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
         <style>
             body {
                 font-family: 'Calibri', sans-serif;
-                font-size: 25px;
             }
             .page-break {
                 page-break-after: always;
@@ -146,7 +119,6 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
     '''
     template = Template(template_str)
-    # Render the template with the chapter number, problem lists, and questions
     rendered_html = template.render(chapter_number=int(chapter_num),
                                     conceptual_problem_list=conceptual_list,
                                     problems_and_exercise_list=problem_list,
@@ -154,7 +126,6 @@ def generate_html(chapter_num, conceptual_input, problem_input,path):
                                     conceptual_url=conceptual_url,
                                     problem_url=problem_url)
-    # Save the rendered HTML to a file
     with open(path, 'w', encoding='utf-8') as f:
         f.write(rendered_html)
@@ -166,6 +137,10 @@ def main_function(unit_num, conceptual_input, problem_input):
     if not conceptual_input and not problem_input:
         return "Both lists cannot be empty. Please provide at least one."
     abs_path = "D:\\projects\\phy_pdf" + "\\questions_" + str(int(unit_num))
     html_path = abs_path + ".html"
     generate_html(unit_num, conceptual_input, problem_input, html_path)

 from bs4 import BeautifulSoup
 import requests
 from jinja2 import Template
 from weasyprint import HTML
 import base64
 from urllib.parse import urljoin
 import time
 import os
 import warnings
 warnings.filterwarnings("ignore")
 phy2_conceptual_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions"
 phy2_problem_link="https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises"
         print(f"An error occurred when getting html: {e}")
         return None
+def get_question(exercises, question_index, img_base_url=img_base_url,href_base_url=href_base_url):
     question_index = question_index - 1
     if question_index >= len(exercises):
         return "Question index out of range."
     exercise_div = exercises[question_index]
     print(exercise_div)
     for img_tag in exercise_div.find_all('img'):
         img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
         #print("src changed")
     for a_tag in exercise_div.find_all('a'):
         a_tag['href'] = urljoin(href_base_url, a_tag['href'])
+        #print("href changed")
     return str(exercise_div)
 def get_all_questions(unit_num,conceptual_list,problem_list,conceptual_url,problem_url):
     conceptual_html=get_html(conceptual_url)
     problem_html=get_html(problem_url)
     return questions
 from jinja2 import Template
 def generate_html(chapter_num, conceptual_input, problem_input,path):
     problem_url=phy2_problem_link.format(int(chapter_num))
     questions=get_all_questions(int(chapter_num),conceptual_list,problem_list,conceptual_url,problem_url)
     template_str = '''
     <!DOCTYPE html>
     <html>
         <style>
             body {
                 font-family: 'Calibri', sans-serif;
+                font-size: 20px;
             }
             .page-break {
                 page-break-after: always;
     '''
     template = Template(template_str)
     rendered_html = template.render(chapter_number=int(chapter_num),
                                     conceptual_problem_list=conceptual_list,
                                     problems_and_exercise_list=problem_list,
                                     conceptual_url=conceptual_url,
                                     problem_url=problem_url)
     with open(path, 'w', encoding='utf-8') as f:
         f.write(rendered_html)
     if not conceptual_input and not problem_input:
         return "Both lists cannot be empty. Please provide at least one."
+    for file_name in os.listdir(abs_path):
+        if file_name.endswith(".pdf"):
+            os.remove(os.path.join(abs_path, file_name))
     abs_path = "D:\\projects\\phy_pdf" + "\\questions_" + str(int(unit_num))
     html_path = abs_path + ".html"
     generate_html(unit_num, conceptual_input, problem_input, html_path)