File size: 5,751 Bytes
f4947f0
 
 
 
24d7505
 
 
f4947f0
24d7505
f4947f0
3da7b14
f4947f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24d7505
 
 
 
 
3da7b14
24d7505
3da7b14
24d7505
 
 
 
 
3da7b14
24d7505
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4947f0
 
 
3da7b14
f4947f0
36325cc
 
 
 
 
 
 
f4947f0
36325cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24d7505
36325cc
 
 
 
 
 
3da7b14
f4947f0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer, util
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import os
import random

# Load Hugging Face model (e.g., FLAN-T5)
@st.cache_resource
def load_model():
    return pipeline("text2text-generation", model="google/flan-t5-base")

# Extract text from PDF files in the dataset folder
def extract_text_from_pdfs(folder_path):
    pdf_texts = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(folder_path, filename)
            reader = PdfReader(pdf_path)
            text = " ".join([page.extract_text() for page in reader.pages])
            pdf_texts.append(text)
    return pdf_texts

# Embed PDF texts for similarity matching
@st.cache_resource
def create_embeddings(texts):
    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = embedder.encode(texts, convert_to_tensor=True)
    return embedder, embeddings

# Query matching function
def get_relevant_content(query, texts, embedder, embeddings):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    scores = util.pytorch_cos_sim(query_embedding, embeddings)
    best_idx = scores.argmax().item()
    return texts[best_idx]

# Email functionality
def send_email(to_email, subject, body):
    try:
        sender_email = "your-email@example.com"  # Replace with your email
        sender_password = "your-email-password"  # Replace with your email password
        smtp_server = "smtp.gmail.com"
        smtp_port = 587

        msg = MIMEMultipart()
        msg['From'] = sender_email
        msg['To'] = to_email
        msg['Subject'] = subject
        msg.attach(MIMEText(body, 'plain'))

        with smtplib.SMTP(smtp_server, smtp_port) as server:
            server.starttls()
            server.login(sender_email, sender_password)
            server.send_message(msg)
        return True
    except Exception as e:
        st.error(f"Failed to send email: {e}")
        return False

# CAPTCHA generation
def generate_captcha():
    num1 = random.randint(1, 9)
    num2 = random.randint(1, 9)
    return num1, num2, num1 + num2

# Streamlit UI
def main():
    st.title("Educational Assistant Chatbot")
    st.markdown("Welcome! Let's create your student profile to provide tailored guidance.")

    # Hardcoded dataset folder and email
    dataset_folder = "./pdf_dataset"
    email_to_send = "application@aspireec.pk"

    # Load dataset on startup
    if "pdf_texts" not in st.session_state:
        st.session_state.pdf_texts = extract_text_from_pdfs(dataset_folder)
        st.session_state.embedder, st.session_state.embeddings = create_embeddings(st.session_state.pdf_texts)

    # Initialize session state for profile submission
    if "profile_submitted" not in st.session_state:
        st.session_state.profile_submitted = False

    # Show profile creation form only if profile is not submitted
    if not st.session_state.profile_submitted:
        st.markdown("### Create a Student Profile")
        name = st.text_input("Name (required):")
        email = st.text_input("Email (required):")
        contact_number = st.text_input("Contact Number (optional):")
        study_level = st.selectbox("Level of Study:", ["Undergraduate", "Postgraduate", "PhD"])
        field_of_interest = st.text_input("Field of Interest:")
        career_goal = st.text_area("Career Goals:")
        visa_query = st.text_area("Visa Concerns or Questions:")

        # CAPTCHA
        if "captcha_result" not in st.session_state:
            num1, num2, st.session_state.captcha_result = generate_captcha()
        st.markdown(f"**CAPTCHA: {num1} + {num2} = ?**")
        captcha_input = st.text_input("Enter CAPTCHA Result:")

        if st.button("Submit Profile"):
            if not name or not email:
                st.error("Name and Email are required fields.")
            elif int(captcha_input) != st.session_state.captcha_result:
                st.error("Invalid CAPTCHA. Please try again.")
            else:
                profile = f"""
                Name: {name}
                Email: {email}
                Contact Number: {contact_number}
                Level of Study: {study_level}
                Field of Interest: {field_of_interest}
                Career Goals: {career_goal}
                Visa Queries: {visa_query}
                """
                email_sent = send_email(email_to_send, "New Student Profile Submission", profile)
                if email_sent:
                    st.session_state.profile_submitted = True
                    st.success(f"Profile submitted successfully to {email_to_send}!")
                else:
                    st.error("Failed to submit the profile. Please try again later.")

    # Show question input only if profile is submitted
    if st.session_state.profile_submitted:
        st.markdown("### Ask a Question")
        user_query = st.text_input("Your Query:")
        if st.button("Ask"):
            if "embedder" not in st.session_state or "pdf_texts" not in st.session_state:
                st.error("Dataset is not loaded. Please restart the application.")
            else:
                relevant_content = get_relevant_content(
                    user_query, st.session_state.pdf_texts, st.session_state.embedder, st.session_state.embeddings
                )
                model = load_model()
                response = model(f"Question: {user_query} Context: {relevant_content}", max_length=200)
                st.success(response[0]['generated_text'])

if __name__ == "__main__":
    main()