mocktestgen commited on
Commit
9700c83
·
verified ·
1 Parent(s): 9a509c7

Rename A to app.py

Browse files
Files changed (2) hide show
  1. A +0 -0
  2. app.py +245 -0
A DELETED
File without changes
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfplumber
3
+ from PIL import Image
4
+ import pytesseract
5
+ import io
6
+ import re
7
+ import random
8
+ from transformers import pipeline
9
+
10
+ # Use stable models for question generation and summarization
11
+ qg_pipeline = pipeline("text2text-generation", model="t5-base") # stable T5 model
12
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
13
+
14
+ def extract_text_from_pdf(file_bytes):
15
+ try:
16
+ text = ""
17
+ with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
18
+ for page in pdf.pages:
19
+ page_text = page.extract_text()
20
+ if page_text:
21
+ text += page_text + "\n"
22
+ if not text.strip():
23
+ text = ocr_pdf(file_bytes)
24
+ return text
25
+ except Exception:
26
+ return ""
27
+
28
+ def ocr_pdf(file_bytes):
29
+ text = ""
30
+ with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
31
+ for page in pdf.pages:
32
+ pil_image = page.to_image(resolution=300).original
33
+ page_text = pytesseract.image_to_string(pil_image)
34
+ text += page_text + "\n"
35
+ return text
36
+
37
+ def extract_text_from_image(file_bytes):
38
+ image = Image.open(io.BytesIO(file_bytes))
39
+ text = pytesseract.image_to_string(image)
40
+ return text
41
+
42
+ def extract_text_from_txt(file_bytes):
43
+ try:
44
+ text = file_bytes.decode("utf-8")
45
+ except UnicodeDecodeError:
46
+ text = file_bytes.decode("latin-1")
47
+ return text
48
+
49
+ def clean_text(text):
50
+ text = re.sub(r'\n+', '\n', text)
51
+ text = re.sub(r'[ ]{2,}', ' ', text)
52
+ return text.strip()
53
+
54
+ def split_to_sentences(text):
55
+ sentences = re.split(r'(?<=[.?!])\s+', text)
56
+ return [s.strip() for s in sentences if s.strip()]
57
+
58
+ def highlight_answer_in_context(context, answer):
59
+ idx = context.lower().find(answer.lower())
60
+ if idx != -1:
61
+ part1 = context[:idx]
62
+ part2 = context[idx + len(answer):]
63
+ return f"{part1.strip()} <hl> {answer.strip()} <hl> {part2.strip()}"
64
+ else:
65
+ return context
66
+
67
+ def generate_mcq(answer):
68
+ correct_answer = answer
69
+ words = correct_answer.split()
70
+ options = set()
71
+ options.add(correct_answer)
72
+ while len(options) < 4:
73
+ if len(words) > 1:
74
+ shuffled = words[:]
75
+ random.shuffle(shuffled)
76
+ option = ' '.join(shuffled)
77
+ if option.lower() != correct_answer.lower():
78
+ options.add(option)
79
+ else:
80
+ option = correct_answer + random.choice(['.', ',', '?', '!'])
81
+ options.add(option)
82
+ options = list(options)
83
+ random.shuffle(options)
84
+ correct_letter = 'ABCD'[options.index(correct_answer)]
85
+ return options, correct_letter
86
+
87
+ def generate_questions_mcq(context, num_questions):
88
+ sentences = split_to_sentences(context)
89
+ questions_structured = []
90
+ used_questions = set()
91
+ candidates = sentences[:15]
92
+ for sentence in candidates:
93
+ input_text = highlight_answer_in_context(context, sentence)
94
+ input_text_for_model = "generate question: " + input_text
95
+ question = qg_pipeline(input_text_for_model, max_length=64, do_sample=False)[0]['generated_text']
96
+ if question in used_questions or not question.endswith('?'):
97
+ continue
98
+ used_questions.add(question)
99
+ options, correct_letter = generate_mcq(sentence)
100
+ questions_structured.append({
101
+ "question": question,
102
+ "options": options,
103
+ "correct_letter": correct_letter,
104
+ "correct_answer": sentence,
105
+ "explanation": f"Answer explanation: {sentence}"
106
+ })
107
+ if len(questions_structured) >= num_questions:
108
+ break
109
+ if not questions_structured:
110
+ question = "What is the main topic discussed in the content?"
111
+ options = ["Option A", "Option B", "Option C", "Option D"]
112
+ questions_structured.append({
113
+ "question": question,
114
+ "options": options,
115
+ "correct_letter": "A",
116
+ "correct_answer": "Option A",
117
+ "explanation": "Fallback explanation."
118
+ })
119
+ return questions_structured
120
+
121
+ def generate_questions_subjective(context, num_questions):
122
+ sentences = split_to_sentences(context)
123
+ questions_structured = []
124
+ used_questions = set()
125
+ candidates = sentences[:20]
126
+ for sentence in candidates:
127
+ input_text = highlight_answer_in_context(context, sentence)
128
+ input_text_for_model = "generate question: " + input_text
129
+ question = qg_pipeline(input_text_for_model, max_length=64, do_sample=False)[0]['generated_text']
130
+ if question in used_questions or not question.endswith('?'):
131
+ continue
132
+ used_questions.add(question)
133
+ answer = summarizer(sentence, max_length=50, min_length=10, do_sample=False)[0]['summary_text']
134
+ questions_structured.append({
135
+ "question": question,
136
+ "answer": answer
137
+ })
138
+ if len(questions_structured) >= num_questions:
139
+ break
140
+ if not questions_structured:
141
+ questions_structured.append({
142
+ "question": "Describe the main topic discussed in the content.",
143
+ "answer": "The main topic is an overview of the content provided."
144
+ })
145
+ return questions_structured
146
+
147
+ def format_mcq_output(questions):
148
+ output = ""
149
+ for idx, q in enumerate(questions, 1):
150
+ output += f"- Q{idx}: {q['question']}\n"
151
+ opts = ['A', 'B', 'C', 'D']
152
+ for opt_idx, option in enumerate(q['options']):
153
+ output += f" - {opts[opt_idx]}. {option}\n"
154
+ output += f"- Correct Answer: {q['correct_letter']}\n"
155
+ output += f"- Explanation: {q['explanation']}\n\n"
156
+ return output.strip()
157
+
158
+ def format_subjective_output(questions):
159
+ output = ""
160
+ for idx, q in enumerate(questions, 1):
161
+ output += f"- Q{idx}: {q['question']}\n"
162
+ output += f"- Suggested Answer: {q['answer']}\n\n"
163
+ return output.strip()
164
+
165
+ def main_process(file, question_type, num_questions):
166
+ if not file:
167
+ return "Please upload a file."
168
+
169
+ file_bytes = file
170
+ fname = file.name.lower() if hasattr(file, "name") else ""
171
+
172
+ extracted_text = ""
173
+
174
+ if fname.endswith(".pdf"):
175
+ extracted_text = extract_text_from_pdf(file_bytes)
176
+ elif any(fname.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".bmp", ".tiff"]):
177
+ extracted_text = extract_text_from_image(file_bytes)
178
+ elif fname.endswith(".txt"):
179
+ extracted_text = extract_text_from_txt(file_bytes)
180
+ else:
181
+ return "Unsupported file type. Please upload PDF, Image, or TXT."
182
+
183
+ extracted_text = clean_text(extracted_text)
184
+
185
+ if len(extracted_text) < 30:
186
+ return "Extracted text is too short or empty. Please check your input file."
187
+
188
+ if question_type == "MCQ":
189
+ questions = generate_questions_mcq(extracted_text, num_questions)
190
+ return format_mcq_output(questions)
191
+ else:
192
+ questions = generate_questions_subjective(extracted_text, num_questions)
193
+ return format_subjective_output(questions)
194
+
195
+ css = """
196
+ #header {
197
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
198
+ font-weight: 700;
199
+ font-size: 28px;
200
+ text-align: center;
201
+ margin-bottom: 20px;
202
+ color: #333;
203
+ }
204
+ #footer {
205
+ font-size: 12px;
206
+ color: #666;
207
+ margin-top: 30px;
208
+ text-align: center;
209
+ }
210
+ .output-area {
211
+ white-space: pre-wrap;
212
+ background-color: #f3f4f6;
213
+ padding: 15px;
214
+ border-radius: 8px;
215
+ font-family: monospace;
216
+ max-height: 450px;
217
+ overflow-y: auto;
218
+ }
219
+ .gr-button {
220
+ background-color: #4f46e5;
221
+ color: white;
222
+ font-weight: bold;
223
+ border-radius: 8px;
224
+ }
225
+ .gr-button:hover {
226
+ background-color: #4338ca;
227
+ }
228
+ """
229
+
230
+ with gr.Blocks(css=css) as demo:
231
+ gr.Markdown("<div id='header'>📚 Study Content Question Generator</div>")
232
+ with gr.Row():
233
+ file_input = gr.File(label="Upload PDF, Image, or Text file", type="binary") # corrected here
234
+ with gr.Column():
235
+ question_type = gr.Radio(choices=["MCQ", "Subjective"], label="Question Type", value="MCQ")
236
+ num_questions = gr.Slider(1, 10, value=5, step=1, label="Number of Questions")
237
+ generate_btn = gr.Button("Generate Questions")
238
+ output = gr.Textbox(label="Generated Questions", lines=20, interactive=False, elem_classes="output-area")
239
+
240
+ generate_btn.click(fn=main_process, inputs=[file_input, question_type, num_questions], outputs=output)
241
+
242
+ gr.Markdown("<div id='footer'>Made with ❤️ using Hugging Face Spaces and Transformers</div>")
243
+
244
+ if __name__ == "__main__":
245
+ demo.launch()