File size: 7,154 Bytes
271fcb7
 
 
 
 
 
 
37e2932
 
271fcb7
 
37e2932
 
 
cb02641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37e2932
271fcb7
 
 
 
 
 
 
 
 
 
 
 
37e2932
271fcb7
 
d943111
271fcb7
 
 
 
54533ed
271fcb7
37e2932
54533ed
271fcb7
 
54533ed
271fcb7
 
 
 
 
cb02641
 
 
 
 
37e2932
 
 
 
 
 
271fcb7
 
 
37e2932
271fcb7
 
cb02641
271fcb7
 
cb02641
271fcb7
 
 
cb02641
271fcb7
 
 
cb02641
271fcb7
 
 
 
 
a4f4309
 
 
 
271fcb7
 
 
54533ed
271fcb7
 
 
 
a4f4309
 
 
271fcb7
 
 
 
a4f4309
271fcb7
 
 
 
 
 
 
 
 
 
a4f4309
271fcb7
 
 
 
 
 
 
 
 
 
cb02641
271fcb7
 
 
 
cb02641
37e2932
 
 
 
 
 
 
 
 
ffe110a
37e2932
ffe110a
37e2932
 
 
 
 
cb02641
37e2932
ffe110a
 
37e2932
 
 
271fcb7
 
 
 
cb02641
271fcb7
 
 
 
ffe110a
271fcb7
 
 
cb02641
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import gradio as gr
from bs4 import BeautifulSoup
import requests
from jinja2 import Template
from urllib.parse import urljoin
import os
import warnings
import logging

warnings.filterwarnings("ignore")

# Set up logging
logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')

books = {
    "College Physics AP": {
        "conceptual_link": "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions",
        "problem_link": "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises",
        "href_base_url": "https://openstax.org/books/college-physics-ap-courses-2e/pages/"
    },
    "University Physics Vol. 1": {
        "conceptual_link": "https://openstax.org/books/university-physics-volume-1/pages/{}-conceptual-questions",
        "problem_link": "https://openstax.org/books/university-physics-volume-1/pages/{}-problems",
        "href_base_url": "https://openstax.org/books/university-physics-volume-1/pages/"
    },
    "University Physics Vol. 2": {
        "conceptual_link": "https://openstax.org/books/university-physics-volume-2/pages/{}-conceptual-questions",
        "problem_link": "https://openstax.org/books/university-physics-volume-2/pages/{}-problems",
        "href_base_url": "https://openstax.org/books/university-physics-volume-2/pages/"
    }
}

img_base_url = 'https://openstax.org'

def get_html(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    try:
        response = requests.get(url, headers=headers)
        response.encoding = 'utf-8'
        response.raise_for_status()  # Raise an HTTPError for bad responses
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup
    except requests.RequestException as e:
        logging.error(f"An error occurred when getting HTML: {e}")
        return None

def get_question(exercises, question_index, img_base_url, href_base_url):
    question_index = question_index - 1
    
    if question_index >= len(exercises):
        return "Question index out of range."

    exercise_div = exercises[question_index]
    logging.debug(f"Processing exercise: {exercise_div}")

    for img_tag in exercise_div.find_all('img'):
        img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src'])

    for a_tag in exercise_div.find_all('a'):
        a_tag['href'] = urljoin(href_base_url, a_tag['href'])
        
    return str(exercise_div)

def get_all_questions(book_key, unit_num, conceptual_list, problem_list):
    book = books[book_key]
    conceptual_url = book["conceptual_link"].format(int(unit_num))
    problem_url = book["problem_link"].format(int(unit_num))

    conceptual_html = get_html(conceptual_url)
    if conceptual_html is None:
        return []
    problem_html = get_html(problem_url)
    if problem_html is None:
        return []

    conceptual_exercises = conceptual_html.find_all('div', {'data-type': 'exercise'})
    problem_exercises = problem_html.find_all('div', {'data-type': 'exercise'})
    questions = []

    for i in conceptual_list:
        questions.append(get_question(conceptual_exercises, i, img_base_url, book["href_base_url"]))
    
    for i in problem_list:
        questions.append(get_question(problem_exercises, i, img_base_url, book["href_base_url"]))
    
    return questions

def generate_html(book_key, chapter_num, conceptual_input, problem_input, path):
    conceptual_list = list(map(int, conceptual_input.split(",")) if conceptual_input else [])
    problem_list = list(map(int, problem_input.split(",")) if problem_input else [])

    questions = get_all_questions(book_key, int(chapter_num), conceptual_list, problem_list)
    template_str = '''
    <!DOCTYPE html>
    <html>
    <head>
        <title>Chapter {{ chapter_number }} Questions</title>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.css">
        <script defer src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.js"></script>
        <script defer src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"
                onload="renderMathInElement(document.body);"></script>
        <style>
            body {
                font-family: 'Calibri', sans-serif;
                font-size: 20px;
            }
            .page-break {
                page-break-after: always;
            }
            .mathjax {
                display: inline-block;
            }
        </style>
    </head>
    <body>
        <div>
            <h2>Chapter: {{ chapter_number }}</h2>
            {% if conceptual_problem_list %}
                <h2>Conceptual Problems: {{ conceptual_problem_list|join(", ") }}</h2>
            {% endif %}
            {% if problems_and_exercise_list %}
                <h2>Problems and Exercises: {{ problems_and_exercise_list|join(", ") }}</h2>
            {% endif %}
        </div>
        <div class="page-break"></div>
        
        {% for question in questions %}
            <div class="mathjax">{{ question|safe }}</div>
            <div class="page-break"></div>
        {% endfor %}
    </body>
    </html>
    '''
    template = Template(template_str)
    
    rendered_html = template.render(chapter_number=int(chapter_num),
                                    conceptual_problem_list=conceptual_list,
                                    problems_and_exercise_list=problem_list,
                                    questions=questions)
    
    with open(path, 'w', encoding='utf-8') as f:
        f.write(rendered_html)

def main_function(book_key, unit_num, conceptual_input, problem_input):
    try:
        if not conceptual_input and not problem_input:
            return "Both lists cannot be empty. Please provide at least one."

        # Ensure the directory exists
        abs_path = "D:\\projects\\phy_pdf"
        if not os.path.exists(abs_path):
            os.makedirs(abs_path)

        # Delete all existing HTML files in the directory
        for file_name in os.listdir(abs_path):
            if file_name.endswith(".html"):
                os.remove(os.path.join(abs_path, file_name))

        file_name = "questions_" + str(int(unit_num))
        html_path = os.path.join(abs_path, file_name + ".html")

        generate_html(book_key, unit_num, conceptual_input, problem_input, html_path)

        logging.info("HTML generated successfully")
        return html_path
    except Exception as e:
        logging.error(f"An error occurred in main_function: {e}")
        return "An error occurred. Please check the logs for more details."

iface = gr.Interface(
    fn=main_function,
    inputs=[
        gr.Dropdown(label="Select Book", choices=list(books.keys())),
        gr.Number(label="Chapter Number"),
        gr.Textbox(label="Conceptual Problems List (comma-separated)"),
        gr.Textbox(label="Problems & Exercises List (comma-separated)")
    ],
    outputs=gr.File(label="Generated HTML"),
    live=False
)

iface.launch()