Spaces:
Build error
Build error
| import streamlit as st | |
| from langchain_groq import ChatGroq | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from dotenv import load_dotenv | |
| import pytesseract | |
| from PIL import Image, ImageEnhance | |
| import pdfplumber | |
| import docx | |
| from io import BytesIO | |
| import logging | |
| import os | |
| from concurrent.futures import ThreadPoolExecutor | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize logging | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| # Initialize LLM | |
| llm = ChatGroq(temperature=0.5, groq_api_key="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", model_name="llama3-8b-8192") | |
| # OCR Configuration | |
| pytesseract.pytesseract.tesseract_cmd = r"/usr/bin/tesseract" # Adjust based on your system's path | |
| # Function to enhance image for OCR processing | |
| def enhance_image_for_ocr(image): | |
| # Convert to grayscale for better processing | |
| gray_image = image.convert("L") | |
| # Increase contrast for better text clarity | |
| enhancer = ImageEnhance.Contrast(gray_image) | |
| enhanced_image = enhancer.enhance(2.0) # Increase contrast | |
| return enhanced_image | |
| # Function to extract text from images using OCR | |
| def extract_text_from_images(images, lang="eng"): | |
| ocr_text = "" | |
| for image in images: | |
| try: | |
| enhanced_image = enhance_image_for_ocr(image) | |
| ocr_text += pytesseract.image_to_string(enhanced_image, lang=lang).strip() + "\n" | |
| except Exception as e: | |
| logging.error(f"Error in OCR: {e}") | |
| return ocr_text.strip() | |
| # Function to extract content from PDFs | |
| def extract_pdf_data(pdf_file): | |
| data = {"text": "", "images": []} | |
| try: | |
| with pdfplumber.open(pdf_file) as pdf: | |
| for page in pdf.pages: | |
| data["text"] += page.extract_text() or "" | |
| for img in page.images: | |
| base_image = pdf.extract_image(img["object_number"]) | |
| image = Image.open(BytesIO(base_image["image"])) | |
| data["images"].append(image) | |
| except Exception as e: | |
| logging.error(f"Error processing PDF: {e}") | |
| return data | |
| # Function to extract content from DOCX files | |
| def extract_docx_data(docx_file): | |
| try: | |
| doc = docx.Document(docx_file) | |
| text = "\n".join([para.text.strip() for para in doc.paragraphs if para.text.strip()]) | |
| return text | |
| except Exception as e: | |
| logging.error(f"Error processing DOCX: {e}") | |
| return "" | |
| # Function to extract plain text from TXT files | |
| def extract_txt_data(txt_file): | |
| try: | |
| return txt_file.read().decode("utf-8").strip() | |
| except Exception as e: | |
| logging.error(f"Error processing TXT: {e}") | |
| return "" | |
| # Process uploaded files in parallel and extract text and images | |
| def process_files(uploaded_files, lang="eng"): | |
| combined_text = "" | |
| images = [] | |
| def process_file(file): | |
| file_type = file.type.split("/")[-1] | |
| if file_type == "pdf": | |
| pdf_data = extract_pdf_data(file) | |
| return pdf_data["text"], pdf_data["images"] | |
| elif file_type == "docx": | |
| return extract_docx_data(file), [] | |
| elif file_type == "txt": | |
| return extract_txt_data(file), [] | |
| elif file_type in ["png", "jpg", "jpeg"]: | |
| return "", [Image.open(file)] | |
| else: | |
| logging.error(f"Unsupported file type: {file_type}") | |
| return "", [] | |
| with ThreadPoolExecutor() as executor: | |
| results = list(executor.map(process_file, uploaded_files)) | |
| for text, img_list in results: | |
| combined_text += text | |
| images.extend(img_list) | |
| ocr_text = extract_text_from_images(images, lang) | |
| return combined_text + "\n" + ocr_text | |
| # Function to generate questions | |
| def generate_questions(question_type, syllabus_text, num_questions, difficulty, prompt_template): | |
| # Create a prompt based on user inputs | |
| prompt = prompt_template.format( | |
| num_questions=num_questions, | |
| question_type=question_type, | |
| syllabus_text=syllabus_text, | |
| **difficulty | |
| ) | |
| # Pass the prompt to the LLM | |
| chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser()) | |
| try: | |
| questions = chain.invoke({}) | |
| return questions | |
| except Exception as e: | |
| logging.error(f"Error generating questions: {e}") | |
| return "" | |
| # Refined function to generate answers | |
| def generate_answers(questions, syllabus_text): | |
| answers = {} | |
| for i, question in enumerate(questions.split("\n")): | |
| if question.strip(): | |
| prompt = f""" | |
| Below is a syllabus excerpt. Please answer the following question based on the content provided. | |
| Ensure the answer is directly related to the question and specific to the syllabus. | |
| If necessary, explain key concepts clearly. Answer the question in a concise and detailed manner. | |
| Syllabus Content: {syllabus_text} | |
| Question: {question} | |
| Answer: | |
| """ | |
| chain = (ChatPromptTemplate.from_template(prompt) | llm | StrOutputParser()) | |
| try: | |
| answer = chain.invoke({}) | |
| answers[f"Answer {i+1}"] = answer.strip() | |
| except Exception as e: | |
| # Fall back to web search if LLM fails | |
| answers[f"Answer {i+1}"] = search_answers_online(question) | |
| return "\n".join([f"{k}: {v}" for k, v in answers.items()]) | |
| # Function to search answers online | |
| def search_answers_online(question): | |
| search_url = f"https://www.google.com/search?q={question}" | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| try: | |
| response = requests.get(search_url, headers=headers) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| snippets = soup.find_all("div", class_="BNeawe") | |
| return "\n".join([snippet.get_text() for snippet in snippets[:3]]) | |
| except Exception as e: | |
| logging.error(f"Error fetching online answers: {e}") | |
| return "No online answer found." | |
| # Streamlit UI | |
| st.title("AI-Powered Exam Generator") | |
| # Tabs for navigation | |
| tab1, tab2, tab3, tab4 = st.tabs(["π Upload Files", "π Preview Content", "π Generate Questions", "π‘ Generate Answers"]) | |
| # Upload files | |
| with tab1: | |
| st.header("Upload Files") | |
| uploaded_files = st.file_uploader( | |
| "Upload your syllabus (PDF, DOCX, TXT, Images)", | |
| type=["pdf", "docx", "txt", "png", "jpg", "jpeg"], | |
| accept_multiple_files=True | |
| ) | |
| ocr_lang = st.selectbox("Select OCR Language", ["eng", "spa", "fra", "deu", "ita"]) | |
| if uploaded_files: | |
| syllabus_text = process_files(uploaded_files, lang=ocr_lang) | |
| st.session_state["syllabus_text"] = syllabus_text | |
| st.success("Files processed successfully!") | |
| # Preview content | |
| with tab2: | |
| st.header("Preview Syllabus Content") | |
| if "syllabus_text" in st.session_state: | |
| st.text_area("Extracted Content", st.session_state["syllabus_text"], height=300) | |
| if st.session_state.get("images"): | |
| for img in st.session_state["images"]: | |
| st.image(img, caption="Uploaded Image") | |
| else: | |
| st.warning("No content available. Upload files first.") | |
| # Generate questions and answers | |
| with tab3: | |
| st.header("Generate Questions and Answers") | |
| question_type = st.selectbox("Select Question Type", ["MCQs", "Short Questions", "Long Questions", "Fill-in-the-Blank", "Case Study"]) | |
| num_questions = st.text_input("Total Number of Questions") | |
| difficulty_levels = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"] | |
| difficulty = {level: st.slider(level, 0, 5, 1) for level in difficulty_levels} | |
| prompt_template = st.text_area( | |
| "Edit Prompt Template", | |
| """ | |
| Generate {num_questions} {question_type} questions from the syllabus content below. | |
| Syllabus Content: {syllabus_text} | |
| Difficulty Levels: | |
| - Remember: {Remember} | |
| - Understand: {Understand} | |
| - Apply: {Apply} | |
| - Analyze: {Analyze} | |
| - Evaluate: {Evaluate} | |
| - Create: {Create} | |
| """, | |
| height=200 | |
| ) | |
| if num_questions.isdigit() and st.button("Generate Questions and Answers"): | |
| num_questions = int(num_questions) | |
| # Generate questions | |
| questions = generate_questions(question_type, st.session_state.get("syllabus_text", ""), num_questions, difficulty, prompt_template) | |
| st.session_state["questions"] = questions | |
| st.text_area("Generated Questions", questions, height=300) | |
| # Generate answers | |
| answers = generate_answers(questions, st.session_state.get("syllabus_text", "")) | |
| st.session_state["answers"] = answers | |
| st.text_area("Generated Answers", answers, height=300) | |
| # Download questions and answers | |
| st.download_button("Download Questions", questions, file_name="questions.txt") | |
| st.download_button("Download Answers", answers, file_name="answers.txt") | |
| # Generate answers | |
| with tab4: | |
| st.header("Generate Answers (Optional)") | |
| if "questions" in st.session_state: | |
| if st.button("Generate Answers"): | |
| answers = generate_answers(st.session_state["questions"], st.session_state.get("syllabus_text", "")) | |
| st.session_state["answers"] = answers | |
| st.text_area("Generated Answers", answers, height=300) | |
| # Download answers | |
| st.download_button("Download Answers", answers, file_name="answers.txt") |