Spaces:
Sleeping
Sleeping
File size: 5,751 Bytes
f4947f0 24d7505 f4947f0 24d7505 f4947f0 3da7b14 f4947f0 24d7505 3da7b14 24d7505 3da7b14 24d7505 3da7b14 24d7505 f4947f0 3da7b14 f4947f0 36325cc f4947f0 36325cc 24d7505 36325cc 3da7b14 f4947f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer, util
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import os
import random
# Load Hugging Face model (e.g., FLAN-T5)
@st.cache_resource
def load_model():
return pipeline("text2text-generation", model="google/flan-t5-base")
# Extract text from PDF files in the dataset folder
def extract_text_from_pdfs(folder_path):
pdf_texts = []
for filename in os.listdir(folder_path):
if filename.endswith(".pdf"):
pdf_path = os.path.join(folder_path, filename)
reader = PdfReader(pdf_path)
text = " ".join([page.extract_text() for page in reader.pages])
pdf_texts.append(text)
return pdf_texts
# Embed PDF texts for similarity matching
@st.cache_resource
def create_embeddings(texts):
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(texts, convert_to_tensor=True)
return embedder, embeddings
# Query matching function
def get_relevant_content(query, texts, embedder, embeddings):
query_embedding = embedder.encode(query, convert_to_tensor=True)
scores = util.pytorch_cos_sim(query_embedding, embeddings)
best_idx = scores.argmax().item()
return texts[best_idx]
# Email functionality
def send_email(to_email, subject, body):
try:
sender_email = "your-email@example.com" # Replace with your email
sender_password = "your-email-password" # Replace with your email password
smtp_server = "smtp.gmail.com"
smtp_port = 587
msg = MIMEMultipart()
msg['From'] = sender_email
msg['To'] = to_email
msg['Subject'] = subject
msg.attach(MIMEText(body, 'plain'))
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(sender_email, sender_password)
server.send_message(msg)
return True
except Exception as e:
st.error(f"Failed to send email: {e}")
return False
# CAPTCHA generation
def generate_captcha():
num1 = random.randint(1, 9)
num2 = random.randint(1, 9)
return num1, num2, num1 + num2
# Streamlit UI
def main():
st.title("Educational Assistant Chatbot")
st.markdown("Welcome! Let's create your student profile to provide tailored guidance.")
# Hardcoded dataset folder and email
dataset_folder = "./pdf_dataset"
email_to_send = "application@aspireec.pk"
# Load dataset on startup
if "pdf_texts" not in st.session_state:
st.session_state.pdf_texts = extract_text_from_pdfs(dataset_folder)
st.session_state.embedder, st.session_state.embeddings = create_embeddings(st.session_state.pdf_texts)
# Initialize session state for profile submission
if "profile_submitted" not in st.session_state:
st.session_state.profile_submitted = False
# Show profile creation form only if profile is not submitted
if not st.session_state.profile_submitted:
st.markdown("### Create a Student Profile")
name = st.text_input("Name (required):")
email = st.text_input("Email (required):")
contact_number = st.text_input("Contact Number (optional):")
study_level = st.selectbox("Level of Study:", ["Undergraduate", "Postgraduate", "PhD"])
field_of_interest = st.text_input("Field of Interest:")
career_goal = st.text_area("Career Goals:")
visa_query = st.text_area("Visa Concerns or Questions:")
# CAPTCHA
if "captcha_result" not in st.session_state:
num1, num2, st.session_state.captcha_result = generate_captcha()
st.markdown(f"**CAPTCHA: {num1} + {num2} = ?**")
captcha_input = st.text_input("Enter CAPTCHA Result:")
if st.button("Submit Profile"):
if not name or not email:
st.error("Name and Email are required fields.")
elif int(captcha_input) != st.session_state.captcha_result:
st.error("Invalid CAPTCHA. Please try again.")
else:
profile = f"""
Name: {name}
Email: {email}
Contact Number: {contact_number}
Level of Study: {study_level}
Field of Interest: {field_of_interest}
Career Goals: {career_goal}
Visa Queries: {visa_query}
"""
email_sent = send_email(email_to_send, "New Student Profile Submission", profile)
if email_sent:
st.session_state.profile_submitted = True
st.success(f"Profile submitted successfully to {email_to_send}!")
else:
st.error("Failed to submit the profile. Please try again later.")
# Show question input only if profile is submitted
if st.session_state.profile_submitted:
st.markdown("### Ask a Question")
user_query = st.text_input("Your Query:")
if st.button("Ask"):
if "embedder" not in st.session_state or "pdf_texts" not in st.session_state:
st.error("Dataset is not loaded. Please restart the application.")
else:
relevant_content = get_relevant_content(
user_query, st.session_state.pdf_texts, st.session_state.embedder, st.session_state.embeddings
)
model = load_model()
response = model(f"Question: {user_query} Context: {relevant_content}", max_length=200)
st.success(response[0]['generated_text'])
if __name__ == "__main__":
main()
|