Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer, util | |
| import smtplib | |
| from email.mime.text import MIMEText | |
| from email.mime.multipart import MIMEMultipart | |
| import os | |
| import random | |
| # Load Hugging Face model (e.g., FLAN-T5) | |
| def load_model(): | |
| return pipeline("text2text-generation", model="google/flan-t5-base") | |
| # Extract text from PDF files in the dataset folder | |
| def extract_text_from_pdfs(folder_path): | |
| pdf_texts = [] | |
| for filename in os.listdir(folder_path): | |
| if filename.endswith(".pdf"): | |
| pdf_path = os.path.join(folder_path, filename) | |
| reader = PdfReader(pdf_path) | |
| text = " ".join([page.extract_text() for page in reader.pages]) | |
| pdf_texts.append(text) | |
| return pdf_texts | |
| # Embed PDF texts for similarity matching | |
| def create_embeddings(texts): | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| embeddings = embedder.encode(texts, convert_to_tensor=True) | |
| return embedder, embeddings | |
| # Query matching function | |
| def get_relevant_content(query, texts, embedder, embeddings): | |
| query_embedding = embedder.encode(query, convert_to_tensor=True) | |
| scores = util.pytorch_cos_sim(query_embedding, embeddings) | |
| best_idx = scores.argmax().item() | |
| return texts[best_idx] | |
| # Email functionality | |
| def send_email(to_email, subject, body): | |
| try: | |
| sender_email = "your-email@example.com" # Replace with your email | |
| sender_password = "your-email-password" # Replace with your email password | |
| smtp_server = "smtp.gmail.com" | |
| smtp_port = 587 | |
| msg = MIMEMultipart() | |
| msg['From'] = sender_email | |
| msg['To'] = to_email | |
| msg['Subject'] = subject | |
| msg.attach(MIMEText(body, 'plain')) | |
| with smtplib.SMTP(smtp_server, smtp_port) as server: | |
| server.starttls() | |
| server.login(sender_email, sender_password) | |
| server.send_message(msg) | |
| return True | |
| except Exception as e: | |
| st.error(f"Failed to send email: {e}") | |
| return False | |
| # CAPTCHA generation | |
| def generate_captcha(): | |
| num1 = random.randint(1, 9) | |
| num2 = random.randint(1, 9) | |
| return num1, num2, num1 + num2 | |
| # Streamlit UI | |
| def main(): | |
| st.title("Educational Assistant Chatbot") | |
| st.markdown("Welcome! Let's create your student profile to provide tailored guidance.") | |
| # Hardcoded dataset folder and email | |
| dataset_folder = "./pdf_dataset" | |
| email_to_send = "application@aspireec.pk" | |
| # Load dataset on startup | |
| if "pdf_texts" not in st.session_state: | |
| st.session_state.pdf_texts = extract_text_from_pdfs(dataset_folder) | |
| st.session_state.embedder, st.session_state.embeddings = create_embeddings(st.session_state.pdf_texts) | |
| # Initialize session state for profile submission | |
| if "profile_submitted" not in st.session_state: | |
| st.session_state.profile_submitted = False | |
| # Show profile creation form only if profile is not submitted | |
| if not st.session_state.profile_submitted: | |
| st.markdown("### Create a Student Profile") | |
| name = st.text_input("Name (required):") | |
| email = st.text_input("Email (required):") | |
| contact_number = st.text_input("Contact Number (optional):") | |
| study_level = st.selectbox("Level of Study:", ["Undergraduate", "Postgraduate", "PhD"]) | |
| field_of_interest = st.text_input("Field of Interest:") | |
| career_goal = st.text_area("Career Goals:") | |
| visa_query = st.text_area("Visa Concerns or Questions:") | |
| # CAPTCHA | |
| if "captcha_result" not in st.session_state: | |
| num1, num2, st.session_state.captcha_result = generate_captcha() | |
| st.markdown(f"**CAPTCHA: {num1} + {num2} = ?**") | |
| captcha_input = st.text_input("Enter CAPTCHA Result:") | |
| if st.button("Submit Profile"): | |
| if not name or not email: | |
| st.error("Name and Email are required fields.") | |
| elif int(captcha_input) != st.session_state.captcha_result: | |
| st.error("Invalid CAPTCHA. Please try again.") | |
| else: | |
| profile = f""" | |
| Name: {name} | |
| Email: {email} | |
| Contact Number: {contact_number} | |
| Level of Study: {study_level} | |
| Field of Interest: {field_of_interest} | |
| Career Goals: {career_goal} | |
| Visa Queries: {visa_query} | |
| """ | |
| email_sent = send_email(email_to_send, "New Student Profile Submission", profile) | |
| if email_sent: | |
| st.session_state.profile_submitted = True | |
| st.success(f"Profile submitted successfully to {email_to_send}!") | |
| else: | |
| st.error("Failed to submit the profile. Please try again later.") | |
| # Show question input only if profile is submitted | |
| if st.session_state.profile_submitted: | |
| st.markdown("### Ask a Question") | |
| user_query = st.text_input("Your Query:") | |
| if st.button("Ask"): | |
| if "embedder" not in st.session_state or "pdf_texts" not in st.session_state: | |
| st.error("Dataset is not loaded. Please restart the application.") | |
| else: | |
| relevant_content = get_relevant_content( | |
| user_query, st.session_state.pdf_texts, st.session_state.embedder, st.session_state.embeddings | |
| ) | |
| model = load_model() | |
| response = model(f"Question: {user_query} Context: {relevant_content}", max_length=200) | |
| st.success(response[0]['generated_text']) | |
| if __name__ == "__main__": | |
| main() | |