Spaces:
Runtime error
Runtime error
File size: 7,154 Bytes
271fcb7 37e2932 271fcb7 37e2932 cb02641 37e2932 271fcb7 37e2932 271fcb7 d943111 271fcb7 54533ed 271fcb7 37e2932 54533ed 271fcb7 54533ed 271fcb7 cb02641 37e2932 271fcb7 37e2932 271fcb7 cb02641 271fcb7 cb02641 271fcb7 cb02641 271fcb7 cb02641 271fcb7 a4f4309 271fcb7 54533ed 271fcb7 a4f4309 271fcb7 a4f4309 271fcb7 a4f4309 271fcb7 cb02641 271fcb7 cb02641 37e2932 ffe110a 37e2932 ffe110a 37e2932 cb02641 37e2932 ffe110a 37e2932 271fcb7 cb02641 271fcb7 ffe110a 271fcb7 cb02641 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import gradio as gr
from bs4 import BeautifulSoup
import requests
from jinja2 import Template
from urllib.parse import urljoin
import os
import warnings
import logging
warnings.filterwarnings("ignore")
# Set up logging
logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')
books = {
"College Physics AP": {
"conceptual_link": "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions",
"problem_link": "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises",
"href_base_url": "https://openstax.org/books/college-physics-ap-courses-2e/pages/"
},
"University Physics Vol. 1": {
"conceptual_link": "https://openstax.org/books/university-physics-volume-1/pages/{}-conceptual-questions",
"problem_link": "https://openstax.org/books/university-physics-volume-1/pages/{}-problems",
"href_base_url": "https://openstax.org/books/university-physics-volume-1/pages/"
},
"University Physics Vol. 2": {
"conceptual_link": "https://openstax.org/books/university-physics-volume-2/pages/{}-conceptual-questions",
"problem_link": "https://openstax.org/books/university-physics-volume-2/pages/{}-problems",
"href_base_url": "https://openstax.org/books/university-physics-volume-2/pages/"
}
}
img_base_url = 'https://openstax.org'
def get_html(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
try:
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
response.raise_for_status() # Raise an HTTPError for bad responses
soup = BeautifulSoup(response.text, 'html.parser')
return soup
except requests.RequestException as e:
logging.error(f"An error occurred when getting HTML: {e}")
return None
def get_question(exercises, question_index, img_base_url, href_base_url):
question_index = question_index - 1
if question_index >= len(exercises):
return "Question index out of range."
exercise_div = exercises[question_index]
logging.debug(f"Processing exercise: {exercise_div}")
for img_tag in exercise_div.find_all('img'):
img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])
for a_tag in exercise_div.find_all('a'):
a_tag['href'] = urljoin(href_base_url, a_tag['href'])
return str(exercise_div)
def get_all_questions(book_key, unit_num, conceptual_list, problem_list):
book = books[book_key]
conceptual_url = book["conceptual_link"].format(int(unit_num))
problem_url = book["problem_link"].format(int(unit_num))
conceptual_html = get_html(conceptual_url)
if conceptual_html is None:
return []
problem_html = get_html(problem_url)
if problem_html is None:
return []
conceptual_exercises = conceptual_html.find_all('div', {'data-type': 'exercise'})
problem_exercises = problem_html.find_all('div', {'data-type': 'exercise'})
questions = []
for i in conceptual_list:
questions.append(get_question(conceptual_exercises, i, img_base_url, book["href_base_url"]))
for i in problem_list:
questions.append(get_question(problem_exercises, i, img_base_url, book["href_base_url"]))
return questions
def generate_html(book_key, chapter_num, conceptual_input, problem_input, path):
conceptual_list = list(map(int, conceptual_input.split(",")) if conceptual_input else [])
problem_list = list(map(int, problem_input.split(",")) if problem_input else [])
questions = get_all_questions(book_key, int(chapter_num), conceptual_list, problem_list)
template_str = '''
<!DOCTYPE html>
<html>
<head>
<title>Chapter {{ chapter_number }} Questions</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.css">
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.js"></script>
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"
onload="renderMathInElement(document.body);"></script>
<style>
body {
font-family: 'Calibri', sans-serif;
font-size: 20px;
}
.page-break {
page-break-after: always;
}
.mathjax {
display: inline-block;
}
</style>
</head>
<body>
<div>
<h2>Chapter: {{ chapter_number }}</h2>
{% if conceptual_problem_list %}
<h2>Conceptual Problems: {{ conceptual_problem_list|join(", ") }}</h2>
{% endif %}
{% if problems_and_exercise_list %}
<h2>Problems and Exercises: {{ problems_and_exercise_list|join(", ") }}</h2>
{% endif %}
</div>
<div class="page-break"></div>
{% for question in questions %}
<div class="mathjax">{{ question|safe }}</div>
<div class="page-break"></div>
{% endfor %}
</body>
</html>
'''
template = Template(template_str)
rendered_html = template.render(chapter_number=int(chapter_num),
conceptual_problem_list=conceptual_list,
problems_and_exercise_list=problem_list,
questions=questions)
with open(path, 'w', encoding='utf-8') as f:
f.write(rendered_html)
def main_function(book_key, unit_num, conceptual_input, problem_input):
try:
if not conceptual_input and not problem_input:
return "Both lists cannot be empty. Please provide at least one."
# Ensure the directory exists
abs_path = "D:\\projects\\phy_pdf"
if not os.path.exists(abs_path):
os.makedirs(abs_path)
# Delete all existing HTML files in the directory
for file_name in os.listdir(abs_path):
if file_name.endswith(".html"):
os.remove(os.path.join(abs_path, file_name))
file_name = "questions_" + str(int(unit_num))
html_path = os.path.join(abs_path, file_name + ".html")
generate_html(book_key, unit_num, conceptual_input, problem_input, html_path)
logging.info("HTML generated successfully")
return html_path
except Exception as e:
logging.error(f"An error occurred in main_function: {e}")
return "An error occurred. Please check the logs for more details."
iface = gr.Interface(
fn=main_function,
inputs=[
gr.Dropdown(label="Select Book", choices=list(books.keys())),
gr.Number(label="Chapter Number"),
gr.Textbox(label="Conceptual Problems List (comma-separated)"),
gr.Textbox(label="Problems & Exercises List (comma-separated)")
],
outputs=gr.File(label="Generated HTML"),
live=False
)
iface.launch()
|