import streamlit as st from transformers import pipeline from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer, util import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart import os import random # Load Hugging Face model (e.g., FLAN-T5) @st.cache_resource def load_model(): return pipeline("text2text-generation", model="google/flan-t5-base") # Extract text from PDF files in the dataset folder def extract_text_from_pdfs(folder_path): pdf_texts = [] for filename in os.listdir(folder_path): if filename.endswith(".pdf"): pdf_path = os.path.join(folder_path, filename) reader = PdfReader(pdf_path) text = " ".join([page.extract_text() for page in reader.pages]) pdf_texts.append(text) return pdf_texts # Embed PDF texts for similarity matching @st.cache_resource def create_embeddings(texts): embedder = SentenceTransformer('all-MiniLM-L6-v2') embeddings = embedder.encode(texts, convert_to_tensor=True) return embedder, embeddings # Query matching function def get_relevant_content(query, texts, embedder, embeddings): query_embedding = embedder.encode(query, convert_to_tensor=True) scores = util.pytorch_cos_sim(query_embedding, embeddings) best_idx = scores.argmax().item() return texts[best_idx] # Email functionality def send_email(to_email, subject, body): try: sender_email = "your-email@example.com" # Replace with your email sender_password = "your-email-password" # Replace with your email password smtp_server = "smtp.gmail.com" smtp_port = 587 msg = MIMEMultipart() msg['From'] = sender_email msg['To'] = to_email msg['Subject'] = subject msg.attach(MIMEText(body, 'plain')) with smtplib.SMTP(smtp_server, smtp_port) as server: server.starttls() server.login(sender_email, sender_password) server.send_message(msg) return True except Exception as e: st.error(f"Failed to send email: {e}") return False # CAPTCHA generation def generate_captcha(): num1 = random.randint(1, 9) num2 = random.randint(1, 9) return num1, num2, num1 + num2 # Streamlit UI def main(): st.title("Educational Assistant Chatbot") st.markdown("Welcome! Let's create your student profile to provide tailored guidance.") # Hardcoded dataset folder and email dataset_folder = "./pdf_dataset" email_to_send = "application@aspireec.pk" # Load dataset on startup if "pdf_texts" not in st.session_state: st.session_state.pdf_texts = extract_text_from_pdfs(dataset_folder) st.session_state.embedder, st.session_state.embeddings = create_embeddings(st.session_state.pdf_texts) # Initialize session state for profile submission if "profile_submitted" not in st.session_state: st.session_state.profile_submitted = False # Show profile creation form only if profile is not submitted if not st.session_state.profile_submitted: st.markdown("### Create a Student Profile") name = st.text_input("Name (required):") email = st.text_input("Email (required):") contact_number = st.text_input("Contact Number (optional):") study_level = st.selectbox("Level of Study:", ["Undergraduate", "Postgraduate", "PhD"]) field_of_interest = st.text_input("Field of Interest:") career_goal = st.text_area("Career Goals:") visa_query = st.text_area("Visa Concerns or Questions:") # CAPTCHA if "captcha_result" not in st.session_state: num1, num2, st.session_state.captcha_result = generate_captcha() st.markdown(f"**CAPTCHA: {num1} + {num2} = ?**") captcha_input = st.text_input("Enter CAPTCHA Result:") if st.button("Submit Profile"): if not name or not email: st.error("Name and Email are required fields.") elif int(captcha_input) != st.session_state.captcha_result: st.error("Invalid CAPTCHA. Please try again.") else: profile = f""" Name: {name} Email: {email} Contact Number: {contact_number} Level of Study: {study_level} Field of Interest: {field_of_interest} Career Goals: {career_goal} Visa Queries: {visa_query} """ email_sent = send_email(email_to_send, "New Student Profile Submission", profile) if email_sent: st.session_state.profile_submitted = True st.success(f"Profile submitted successfully to {email_to_send}!") else: st.error("Failed to submit the profile. Please try again later.") # Show question input only if profile is submitted if st.session_state.profile_submitted: st.markdown("### Ask a Question") user_query = st.text_input("Your Query:") if st.button("Ask"): if "embedder" not in st.session_state or "pdf_texts" not in st.session_state: st.error("Dataset is not loaded. Please restart the application.") else: relevant_content = get_relevant_content( user_query, st.session_state.pdf_texts, st.session_state.embedder, st.session_state.embeddings ) model = load_model() response = model(f"Question: {user_query} Context: {relevant_content}", max_length=200) st.success(response[0]['generated_text']) if __name__ == "__main__": main()