Spaces:

ahm14
/

Advanced_Exam_Generator

Build error

App Files Files Community

ahm14 commited on Jan 13, 2025

Commit

7439c0c

verified ·

1 Parent(s): b0cb305

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -148

app.py CHANGED Viewed

@@ -13,12 +13,6 @@ import os
 from concurrent.futures import ThreadPoolExecutor
 import requests
 from bs4 import BeautifulSoup
-import re
-import json
-import pandas as pd
-import random
-import zipfile
-from fpdf import FPDF
 # Load environment variables
 load_dotenv()
@@ -34,14 +28,12 @@ pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract"  # Adjust based on
 # Function to enhance image for OCR processing
 def enhance_image_for_ocr(image):
-    try:
-        gray_image = image.convert("L")
-        enhancer = ImageEnhance.Contrast(gray_image)
-        enhanced_image = enhancer.enhance(2.0)  # Increase contrast
-        return enhanced_image
-    except Exception as e:
-        logging.error(f"Error in image enhancement: {e}")
-        return image
 # Function to extract text from images using OCR
 def extract_text_from_images(images, lang="eng"):
@@ -51,7 +43,7 @@ def extract_text_from_images(images, lang="eng"):
             enhanced_image = enhance_image_for_ocr(image)
             ocr_text += pytesseract.image_to_string(enhanced_image, lang=lang).strip() + "\n"
         except Exception as e:
-            logging.error(f"Error in OCR processing: {e}")
     return ocr_text.strip()
 # Function to extract content from PDFs
@@ -96,22 +88,18 @@ def process_files(uploaded_files, lang="eng"):
     images = []
     def process_file(file):
-        try:
-            file_type = file.type.split("/")[-1]
-            if file_type == "pdf":
-                pdf_data = extract_pdf_data(file)
-                return pdf_data["text"], pdf_data["images"]
-            elif file_type == "docx":
-                return extract_docx_data(file), []
-            elif file_type == "txt":
-                return extract_txt_data(file), []
-            elif file_type in ["png", "jpg", "jpeg"]:
-                return "", [Image.open(file)]
-            else:
-                logging.error(f"Unsupported file type: {file_type}")
-                return "", []
-        except Exception as e:
-            logging.error(f"Error processing file: {e}")
             return "", []
     with ThreadPoolExecutor() as executor:
@@ -124,106 +112,64 @@ def process_files(uploaded_files, lang="eng"):
     ocr_text = extract_text_from_images(images, lang)
     return combined_text + "\n" + ocr_text
-# Function to summarize syllabus content
-def summarize_syllabus(syllabus_text):
-    prompt = f"Summarize the following syllabus content in a concise manner:\n{syllabus_text}"
-    chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
-    try:
-        summary = chain.invoke({})
-        return summary.strip()
-    except Exception as e:
-        logging.error(f"Error summarizing syllabus: {e}")
-        return "Could not summarize the syllabus."
-# Function to generate MCQ questions with customizable options
-def generate_mcq_question(question, options_count=4):
-    prompt = f"Generate a multiple-choice question with {options_count} options based on this question:\n{question}"
     chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
     try:
-        mcq = chain.invoke({})
-        return mcq.strip()
     except Exception as e:
-        logging.error(f"Error generating MCQ: {e}")
-        return "Failed to generate MCQ."
-# Function to randomize the order of questions and answers
-def randomize_question_order(questions, answers):
-    question_list = questions.split("\n")
-    answer_list = answers.split("\n")
-    combined = list(zip(question_list, answer_list))
-    random.shuffle(combined)
-    randomized_questions = "\n".join([q for q, a in combined])
-    randomized_answers = "\n".join([a for q, a in combined])
-    return randomized_questions, randomized_answers
-# Function to review answers for clarity and conciseness
-def review_answers(answers):
-    prompt = f"Review and improve the following answers for clarity and conciseness:\n{answers}"
-    chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
-    try:
-        reviewed_answers = chain.invoke({})
-        return reviewed_answers.strip()
-    except Exception as e:
-        logging.error(f"Error reviewing answers: {e}")
-        return answers
-# Save questions and answers as a PDF
-def save_as_pdf(questions, answers):
-    try:
-        pdf = FPDF()
-        pdf.set_auto_page_break(auto=True, margin=15)
-        pdf.add_page()
-        pdf.set_font("Arial", size=12)
-        pdf.cell(200, 10, txt="Questions and Answers", ln=True, align="C")
-        for i, (question, answer) in enumerate(zip(questions.split("\n"), answers.split("\n"))):
-            if question.strip():
-                pdf.multi_cell(0, 10, f"Q{i+1}: {question}")
-                pdf.multi_cell(0, 10, f"A{i+1}: {answer}")
-                pdf.ln()
-        pdf_output = BytesIO()
-        pdf.output(pdf_output)
-        pdf_output.seek(0)
-        return pdf_output
-    except Exception as e:
-        logging.error(f"Error saving as PDF: {e}")
-        return None
-# Save questions and answers as DOCX
-def save_as_docx(questions, answers):
-    try:
-        doc = docx.Document()
-        doc.add_heading('Questions and Answers', 0)
-        for i, question in enumerate(questions.split("\n")):
-            if question.strip():
-                doc.add_paragraph(f"Q{i+1}: {question}")
-                answer_list = answers.split("\n")
-                doc.add_paragraph(f"A{i+1}: {answer_list[i]}")
-        doc_output = BytesIO()
-        doc.save(doc_output)
-        doc_output.seek(0)
-        return doc_output
-    except Exception as e:
-        logging.error(f"Error saving as DOCX: {e}")
-        return None
-# Function to extract files from a ZIP archive
-def extract_zip_file(zip_file):
     try:
-        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
-            zip_ref.extractall("extracted_files")
-            file_names = zip_ref.namelist()
-        return file_names
     except Exception as e:
-        logging.error(f"Error extracting ZIP file: {e}")
-        return []
 # Streamlit UI
 st.title("AI-Powered Exam Generator")
@@ -236,26 +182,25 @@ with tab1:
     st.header("Upload Files")
     uploaded_files = st.file_uploader(
         "Upload your syllabus (PDF, DOCX, TXT, Images)",
-        type=["pdf", "docx", "txt", "png", "jpg", "jpeg", "zip"],
         accept_multiple_files=True
     )
     ocr_lang = st.selectbox("Select OCR Language", ["eng", "spa", "fra", "deu", "ita"])
     if uploaded_files:
-        try:
-            syllabus_text = process_files(uploaded_files, lang=ocr_lang)
-            st.session_state["syllabus_text"] = syllabus_text
-            st.success("Files processed successfully!")
-        except Exception as e:
-            st.error(f"Error processing files: {e}")
-# Preview content and summarize
 with tab2:
-    st.header("Preview and Summarize Syllabus Content")
     if "syllabus_text" in st.session_state:
         st.text_area("Extracted Content", st.session_state["syllabus_text"], height=300)
-        if st.button("Summarize Syllabus"):
-            summary = summarize_syllabus(st.session_state["syllabus_text"])
-            st.text_area("Summary", summary, height=200)
 # Generate questions and answers
 with tab3:
@@ -280,24 +225,30 @@ with tab3:
         height=200
     )
     if num_questions.isdigit() and st.button("Generate Questions and Answers"):
-        try:
-            num_questions = int(num_questions)
-            questions = generate_mcq_question(st.session_state["syllabus_text"], options_count=num_questions)
-            st.session_state["questions"] = questions
-            st.text_area("Generated Questions", questions, height=300)
-            # Generate answers
-            answers = review_answers(questions)
-            st.session_state["answers"] = answers
-            st.text_area("Generated Answers", answers, height=300)
-        except Exception as e:
-            logging.error(f"Error generating questions/answers: {e}")
-            st.error("Error generating questions/answers")
 # Generate answers
 with tab4:
     st.header("Generate Answers (Optional)")
     if "questions" in st.session_state:
-        if st.button("Generate Answers"):
-            answers = review_answers(st.session_state["questions"])
             st.session_state["answers"] = answers
-            st.text_area("Generated Answers", answers, height=300)

 from concurrent.futures import ThreadPoolExecutor
 import requests
 from bs4 import BeautifulSoup
 # Load environment variables
 load_dotenv()
 # Function to enhance image for OCR processing
 def enhance_image_for_ocr(image):
+    # Convert to grayscale for better processing
+    gray_image = image.convert("L")
+    # Increase contrast for better text clarity
+    enhancer = ImageEnhance.Contrast(gray_image)
+    enhanced_image = enhancer.enhance(2.0)  # Increase contrast
+    return enhanced_image
 # Function to extract text from images using OCR
 def extract_text_from_images(images, lang="eng"):
             enhanced_image = enhance_image_for_ocr(image)
             ocr_text += pytesseract.image_to_string(enhanced_image, lang=lang).strip() + "\n"
         except Exception as e:
+            logging.error(f"Error in OCR: {e}")
     return ocr_text.strip()
 # Function to extract content from PDFs
     images = []
     def process_file(file):
+        file_type = file.type.split("/")[-1]
+        if file_type == "pdf":
+            pdf_data = extract_pdf_data(file)
+            return pdf_data["text"], pdf_data["images"]
+        elif file_type == "docx":
+            return extract_docx_data(file), []
+        elif file_type == "txt":
+            return extract_txt_data(file), []
+        elif file_type in ["png", "jpg", "jpeg"]:
+            return "", [Image.open(file)]
+        else:
+            logging.error(f"Unsupported file type: {file_type}")
             return "", []
     with ThreadPoolExecutor() as executor:
     ocr_text = extract_text_from_images(images, lang)
     return combined_text + "\n" + ocr_text
+# Function to generate questions
+def generate_questions(question_type, syllabus_text, num_questions, difficulty, prompt_template):
+    # Create a prompt based on user inputs
+    prompt = prompt_template.format(
+        num_questions=num_questions,
+        question_type=question_type,
+        syllabus_text=syllabus_text,
+        **difficulty
+    )
+    # Pass the prompt to the LLM
     chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
     try:
+        questions = chain.invoke({})
+        return questions
     except Exception as e:
+        logging.error(f"Error generating questions: {e}")
+        return ""
+# Refined function to generate answers
+def generate_answers(questions, syllabus_text):
+    answers = {}
+    for i, question in enumerate(questions.split("\n")):
+        if question.strip():
+            prompt = f"""
+            Below is a syllabus excerpt. Please answer the following question based on the content provided.
+            Ensure the answer is directly related to the question and specific to the syllabus.
+            If necessary, explain key concepts clearly. Answer the question in a concise and detailed manner.
+            Syllabus Content: {syllabus_text}
+            Question: {question}
+            Answer:
+            """
+            chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser())
+            try:
+                answer = chain.invoke({})
+                answers[f"Answer {i+1}"] = answer.strip()
+            except Exception as e:
+                # Fall back to web search if LLM fails
+                answers[f"Answer {i+1}"] = search_answers_online(question)
+    return "\n".join([f"{k}: {v}" for k, v in answers.items()])
+# Function to search answers online
+def search_answers_online(question):
+    search_url = f"https://www.google.com/search?q={question}"
+    headers = {"User-Agent": "Mozilla/5.0"}
     try:
+        response = requests.get(search_url, headers=headers)
+        soup = BeautifulSoup(response.text, "html.parser")
+        snippets = soup.find_all("div", class_="BNeawe")
+        return "\n".join([snippet.get_text() for snippet in snippets[:3]])
     except Exception as e:
+        logging.error(f"Error fetching online answers: {e}")
+        return "No online answer found."
 # Streamlit UI
 st.title("AI-Powered Exam Generator")
     st.header("Upload Files")
     uploaded_files = st.file_uploader(
         "Upload your syllabus (PDF, DOCX, TXT, Images)",
+        type=["pdf", "docx", "txt", "png", "jpg", "jpeg"],
         accept_multiple_files=True
     )
     ocr_lang = st.selectbox("Select OCR Language", ["eng", "spa", "fra", "deu", "ita"])
     if uploaded_files:
+        syllabus_text = process_files(uploaded_files, lang=ocr_lang)
+        st.session_state["syllabus_text"] = syllabus_text
+        st.success("Files processed successfully!")
+# Preview content
 with tab2:
+    st.header("Preview Syllabus Content")
     if "syllabus_text" in st.session_state:
         st.text_area("Extracted Content", st.session_state["syllabus_text"], height=300)
+        if st.session_state.get("images"):
+            for img in st.session_state["images"]:
+                st.image(img, caption="Uploaded Image")
+    else:
+        st.warning("No content available. Upload files first.")
 # Generate questions and answers
 with tab3:
         height=200
     )
     if num_questions.isdigit() and st.button("Generate Questions and Answers"):
+        num_questions = int(num_questions)
+        # Generate questions
+        questions = generate_questions(question_type, st.session_state.get("syllabus_text", ""), num_questions, difficulty, prompt_template)
+        st.session_state["questions"] = questions
+        st.text_area("Generated Questions", questions, height=300)
+        # Generate answers
+        answers = generate_answers(questions, st.session_state.get("syllabus_text", ""))
+        st.session_state["answers"] = answers
+        st.text_area("Generated Answers", answers, height=300)
+        # Download questions and answers
+        st.download_button("Download Questions", questions, file_name="questions.txt")
+        st.download_button("Download Answers", answers, file_name="answers.txt")
 # Generate answers
 with tab4:
     st.header("Generate Answers (Optional)")
     if "questions" in st.session_state:
+        if st.button("Generate Answers"):
+            answers = generate_answers(st.session_state["questions"], st.session_state.get("syllabus_text", ""))
             st.session_state["answers"] = answers
+            st.text_area("Generated Answers", answers, height=300)
+            # Download answers
+            st.download_button("Download Answers", answers, file_name="answers.txt")