ahm14 commited on
Commit
7439c0c
·
verified ·
1 Parent(s): b0cb305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -148
app.py CHANGED
@@ -13,12 +13,6 @@ import os
13
  from concurrent.futures import ThreadPoolExecutor
14
  import requests
15
  from bs4 import BeautifulSoup
16
- import re
17
- import json
18
- import pandas as pd
19
- import random
20
- import zipfile
21
- from fpdf import FPDF
22
 
23
  # Load environment variables
24
  load_dotenv()
@@ -34,14 +28,12 @@ pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract" # Adjust based on
34
 
35
  # Function to enhance image for OCR processing
36
  def enhance_image_for_ocr(image):
37
- try:
38
- gray_image = image.convert("L")
39
- enhancer = ImageEnhance.Contrast(gray_image)
40
- enhanced_image = enhancer.enhance(2.0) # Increase contrast
41
- return enhanced_image
42
- except Exception as e:
43
- logging.error(f"Error in image enhancement: {e}")
44
- return image
45
 
46
  # Function to extract text from images using OCR
47
  def extract_text_from_images(images, lang="eng"):
@@ -51,7 +43,7 @@ def extract_text_from_images(images, lang="eng"):
51
  enhanced_image = enhance_image_for_ocr(image)
52
  ocr_text += pytesseract.image_to_string(enhanced_image, lang=lang).strip() + "\n"
53
  except Exception as e:
54
- logging.error(f"Error in OCR processing: {e}")
55
  return ocr_text.strip()
56
 
57
  # Function to extract content from PDFs
@@ -96,22 +88,18 @@ def process_files(uploaded_files, lang="eng"):
96
  images = []
97
 
98
  def process_file(file):
99
- try:
100
- file_type = file.type.split("/")[-1]
101
- if file_type == "pdf":
102
- pdf_data = extract_pdf_data(file)
103
- return pdf_data["text"], pdf_data["images"]
104
- elif file_type == "docx":
105
- return extract_docx_data(file), []
106
- elif file_type == "txt":
107
- return extract_txt_data(file), []
108
- elif file_type in ["png", "jpg", "jpeg"]:
109
- return "", [Image.open(file)]
110
- else:
111
- logging.error(f"Unsupported file type: {file_type}")
112
- return "", []
113
- except Exception as e:
114
- logging.error(f"Error processing file: {e}")
115
  return "", []
116
 
117
  with ThreadPoolExecutor() as executor:
@@ -124,106 +112,64 @@ def process_files(uploaded_files, lang="eng"):
124
  ocr_text = extract_text_from_images(images, lang)
125
  return combined_text + "\n" + ocr_text
126
 
127
- # Function to summarize syllabus content
128
- def summarize_syllabus(syllabus_text):
129
- prompt = f"Summarize the following syllabus content in a concise manner:\n{syllabus_text}"
130
- chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
131
- try:
132
- summary = chain.invoke({})
133
- return summary.strip()
134
- except Exception as e:
135
- logging.error(f"Error summarizing syllabus: {e}")
136
- return "Could not summarize the syllabus."
137
 
138
- # Function to generate MCQ questions with customizable options
139
- def generate_mcq_question(question, options_count=4):
140
- prompt = f"Generate a multiple-choice question with {options_count} options based on this question:\n{question}"
141
  chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
142
  try:
143
- mcq = chain.invoke({})
144
- return mcq.strip()
145
  except Exception as e:
146
- logging.error(f"Error generating MCQ: {e}")
147
- return "Failed to generate MCQ."
148
 
149
- # Function to randomize the order of questions and answers
150
- def randomize_question_order(questions, answers):
151
- question_list = questions.split("\n")
152
- answer_list = answers.split("\n")
153
-
154
- combined = list(zip(question_list, answer_list))
155
- random.shuffle(combined)
156
 
157
- randomized_questions = "\n".join([q for q, a in combined])
158
- randomized_answers = "\n".join([a for q, a in combined])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- return randomized_questions, randomized_answers
161
-
162
- # Function to review answers for clarity and conciseness
163
- def review_answers(answers):
164
- prompt = f"Review and improve the following answers for clarity and conciseness:\n{answers}"
165
- chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
166
- try:
167
- reviewed_answers = chain.invoke({})
168
- return reviewed_answers.strip()
169
- except Exception as e:
170
- logging.error(f"Error reviewing answers: {e}")
171
- return answers
172
-
173
- # Save questions and answers as a PDF
174
- def save_as_pdf(questions, answers):
175
- try:
176
- pdf = FPDF()
177
- pdf.set_auto_page_break(auto=True, margin=15)
178
- pdf.add_page()
179
 
180
- pdf.set_font("Arial", size=12)
181
- pdf.cell(200, 10, txt="Questions and Answers", ln=True, align="C")
182
-
183
- for i, (question, answer) in enumerate(zip(questions.split("\n"), answers.split("\n"))):
184
- if question.strip():
185
- pdf.multi_cell(0, 10, f"Q{i+1}: {question}")
186
- pdf.multi_cell(0, 10, f"A{i+1}: {answer}")
187
- pdf.ln()
188
-
189
- pdf_output = BytesIO()
190
- pdf.output(pdf_output)
191
- pdf_output.seek(0)
192
- return pdf_output
193
- except Exception as e:
194
- logging.error(f"Error saving as PDF: {e}")
195
- return None
196
-
197
- # Save questions and answers as DOCX
198
- def save_as_docx(questions, answers):
199
- try:
200
- doc = docx.Document()
201
- doc.add_heading('Questions and Answers', 0)
202
-
203
- for i, question in enumerate(questions.split("\n")):
204
- if question.strip():
205
- doc.add_paragraph(f"Q{i+1}: {question}")
206
- answer_list = answers.split("\n")
207
- doc.add_paragraph(f"A{i+1}: {answer_list[i]}")
208
-
209
- doc_output = BytesIO()
210
- doc.save(doc_output)
211
- doc_output.seek(0)
212
- return doc_output
213
- except Exception as e:
214
- logging.error(f"Error saving as DOCX: {e}")
215
- return None
216
-
217
- # Function to extract files from a ZIP archive
218
- def extract_zip_file(zip_file):
219
  try:
220
- with zipfile.ZipFile(zip_file, 'r') as zip_ref:
221
- zip_ref.extractall("extracted_files")
222
- file_names = zip_ref.namelist()
223
- return file_names
224
  except Exception as e:
225
- logging.error(f"Error extracting ZIP file: {e}")
226
- return []
227
 
228
  # Streamlit UI
229
  st.title("AI-Powered Exam Generator")
@@ -236,26 +182,25 @@ with tab1:
236
  st.header("Upload Files")
237
  uploaded_files = st.file_uploader(
238
  "Upload your syllabus (PDF, DOCX, TXT, Images)",
239
- type=["pdf", "docx", "txt", "png", "jpg", "jpeg", "zip"],
240
  accept_multiple_files=True
241
  )
242
  ocr_lang = st.selectbox("Select OCR Language", ["eng", "spa", "fra", "deu", "ita"])
243
  if uploaded_files:
244
- try:
245
- syllabus_text = process_files(uploaded_files, lang=ocr_lang)
246
- st.session_state["syllabus_text"] = syllabus_text
247
- st.success("Files processed successfully!")
248
- except Exception as e:
249
- st.error(f"Error processing files: {e}")
250
 
251
- # Preview content and summarize
252
  with tab2:
253
- st.header("Preview and Summarize Syllabus Content")
254
  if "syllabus_text" in st.session_state:
255
  st.text_area("Extracted Content", st.session_state["syllabus_text"], height=300)
256
- if st.button("Summarize Syllabus"):
257
- summary = summarize_syllabus(st.session_state["syllabus_text"])
258
- st.text_area("Summary", summary, height=200)
 
 
259
 
260
  # Generate questions and answers
261
  with tab3:
@@ -280,24 +225,30 @@ with tab3:
280
  height=200
281
  )
282
  if num_questions.isdigit() and st.button("Generate Questions and Answers"):
283
- try:
284
- num_questions = int(num_questions)
285
- questions = generate_mcq_question(st.session_state["syllabus_text"], options_count=num_questions)
286
- st.session_state["questions"] = questions
287
- st.text_area("Generated Questions", questions, height=300)
288
- # Generate answers
289
- answers = review_answers(questions)
290
- st.session_state["answers"] = answers
291
- st.text_area("Generated Answers", answers, height=300)
292
- except Exception as e:
293
- logging.error(f"Error generating questions/answers: {e}")
294
- st.error("Error generating questions/answers")
 
 
 
295
 
296
  # Generate answers
297
  with tab4:
298
  st.header("Generate Answers (Optional)")
299
  if "questions" in st.session_state:
300
- if st.button("Generate Answers"):
301
- answers = review_answers(st.session_state["questions"])
302
  st.session_state["answers"] = answers
303
- st.text_area("Generated Answers", answers, height=300)
 
 
 
 
13
  from concurrent.futures import ThreadPoolExecutor
14
  import requests
15
  from bs4 import BeautifulSoup
 
 
 
 
 
 
16
 
17
  # Load environment variables
18
  load_dotenv()
 
28
 
29
  # Function to enhance image for OCR processing
30
  def enhance_image_for_ocr(image):
31
+ # Convert to grayscale for better processing
32
+ gray_image = image.convert("L")
33
+ # Increase contrast for better text clarity
34
+ enhancer = ImageEnhance.Contrast(gray_image)
35
+ enhanced_image = enhancer.enhance(2.0) # Increase contrast
36
+ return enhanced_image
 
 
37
 
38
  # Function to extract text from images using OCR
39
  def extract_text_from_images(images, lang="eng"):
 
43
  enhanced_image = enhance_image_for_ocr(image)
44
  ocr_text += pytesseract.image_to_string(enhanced_image, lang=lang).strip() + "\n"
45
  except Exception as e:
46
+ logging.error(f"Error in OCR: {e}")
47
  return ocr_text.strip()
48
 
49
  # Function to extract content from PDFs
 
88
  images = []
89
 
90
  def process_file(file):
91
+ file_type = file.type.split("/")[-1]
92
+ if file_type == "pdf":
93
+ pdf_data = extract_pdf_data(file)
94
+ return pdf_data["text"], pdf_data["images"]
95
+ elif file_type == "docx":
96
+ return extract_docx_data(file), []
97
+ elif file_type == "txt":
98
+ return extract_txt_data(file), []
99
+ elif file_type in ["png", "jpg", "jpeg"]:
100
+ return "", [Image.open(file)]
101
+ else:
102
+ logging.error(f"Unsupported file type: {file_type}")
 
 
 
 
103
  return "", []
104
 
105
  with ThreadPoolExecutor() as executor:
 
112
  ocr_text = extract_text_from_images(images, lang)
113
  return combined_text + "\n" + ocr_text
114
 
115
+ # Function to generate questions
116
+ def generate_questions(question_type, syllabus_text, num_questions, difficulty, prompt_template):
117
+ # Create a prompt based on user inputs
118
+ prompt = prompt_template.format(
119
+ num_questions=num_questions,
120
+ question_type=question_type,
121
+ syllabus_text=syllabus_text,
122
+ **difficulty
123
+ )
 
124
 
125
+ # Pass the prompt to the LLM
 
 
126
  chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
127
  try:
128
+ questions = chain.invoke({})
129
+ return questions
130
  except Exception as e:
131
+ logging.error(f"Error generating questions: {e}")
132
+ return ""
133
 
134
+ # Refined function to generate answers
135
+ def generate_answers(questions, syllabus_text):
136
+ answers = {}
 
 
 
 
137
 
138
+ for i, question in enumerate(questions.split("\n")):
139
+ if question.strip():
140
+ prompt = f"""
141
+ Below is a syllabus excerpt. Please answer the following question based on the content provided.
142
+ Ensure the answer is directly related to the question and specific to the syllabus.
143
+ If necessary, explain key concepts clearly. Answer the question in a concise and detailed manner.
144
+
145
+ Syllabus Content: {syllabus_text}
146
+
147
+ Question: {question}
148
+ Answer:
149
+ """
150
+
151
+ chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
152
+ try:
153
+ answer = chain.invoke({})
154
+ answers[f"Answer {i+1}"] = answer.strip()
155
+ except Exception as e:
156
+ # Fall back to web search if LLM fails
157
+ answers[f"Answer {i+1}"] = search_answers_online(question)
158
 
159
+ return "\n".join([f"{k}: {v}" for k, v in answers.items()])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ # Function to search answers online
162
+ def search_answers_online(question):
163
+ search_url = f"https://www.google.com/search?q={question}"
164
+ headers = {"User-Agent": "Mozilla/5.0"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  try:
166
+ response = requests.get(search_url, headers=headers)
167
+ soup = BeautifulSoup(response.text, "html.parser")
168
+ snippets = soup.find_all("div", class_="BNeawe")
169
+ return "\n".join([snippet.get_text() for snippet in snippets[:3]])
170
  except Exception as e:
171
+ logging.error(f"Error fetching online answers: {e}")
172
+ return "No online answer found."
173
 
174
  # Streamlit UI
175
  st.title("AI-Powered Exam Generator")
 
182
  st.header("Upload Files")
183
  uploaded_files = st.file_uploader(
184
  "Upload your syllabus (PDF, DOCX, TXT, Images)",
185
+ type=["pdf", "docx", "txt", "png", "jpg", "jpeg"],
186
  accept_multiple_files=True
187
  )
188
  ocr_lang = st.selectbox("Select OCR Language", ["eng", "spa", "fra", "deu", "ita"])
189
  if uploaded_files:
190
+ syllabus_text = process_files(uploaded_files, lang=ocr_lang)
191
+ st.session_state["syllabus_text"] = syllabus_text
192
+ st.success("Files processed successfully!")
 
 
 
193
 
194
+ # Preview content
195
  with tab2:
196
+ st.header("Preview Syllabus Content")
197
  if "syllabus_text" in st.session_state:
198
  st.text_area("Extracted Content", st.session_state["syllabus_text"], height=300)
199
+ if st.session_state.get("images"):
200
+ for img in st.session_state["images"]:
201
+ st.image(img, caption="Uploaded Image")
202
+ else:
203
+ st.warning("No content available. Upload files first.")
204
 
205
  # Generate questions and answers
206
  with tab3:
 
225
  height=200
226
  )
227
  if num_questions.isdigit() and st.button("Generate Questions and Answers"):
228
+ num_questions = int(num_questions)
229
+
230
+ # Generate questions
231
+ questions = generate_questions(question_type, st.session_state.get("syllabus_text", ""), num_questions, difficulty, prompt_template)
232
+ st.session_state["questions"] = questions
233
+ st.text_area("Generated Questions", questions, height=300)
234
+
235
+ # Generate answers
236
+ answers = generate_answers(questions, st.session_state.get("syllabus_text", ""))
237
+ st.session_state["answers"] = answers
238
+ st.text_area("Generated Answers", answers, height=300)
239
+
240
+ # Download questions and answers
241
+ st.download_button("Download Questions", questions, file_name="questions.txt")
242
+ st.download_button("Download Answers", answers, file_name="answers.txt")
243
 
244
  # Generate answers
245
  with tab4:
246
  st.header("Generate Answers (Optional)")
247
  if "questions" in st.session_state:
248
+ if st.button("Generate Answers"):
249
+ answers = generate_answers(st.session_state["questions"], st.session_state.get("syllabus_text", ""))
250
  st.session_state["answers"] = answers
251
+ st.text_area("Generated Answers", answers, height=300)
252
+
253
+ # Download answers
254
+ st.download_button("Download Answers", answers, file_name="answers.txt")