Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
from PyPDF2 import PdfReader
|
| 4 |
+
from sentence_transformers import SentenceTransformer, util
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
# Load Hugging Face model (e.g., FLAN-T5 or GPT-like model)
|
| 8 |
+
@st.cache_resource
|
| 9 |
+
def load_model():
|
| 10 |
+
return pipeline("text2text-generation", model="google/flan-t5-base")
|
| 11 |
+
|
| 12 |
+
# Extract text from PDF files in the dataset folder
|
| 13 |
+
def extract_text_from_pdfs(folder_path):
|
| 14 |
+
pdf_texts = []
|
| 15 |
+
for filename in os.listdir(folder_path):
|
| 16 |
+
if filename.endswith(".pdf"):
|
| 17 |
+
pdf_path = os.path.join(folder_path, filename)
|
| 18 |
+
reader = PdfReader(pdf_path)
|
| 19 |
+
text = " ".join([page.extract_text() for page in reader.pages])
|
| 20 |
+
pdf_texts.append(text)
|
| 21 |
+
return pdf_texts
|
| 22 |
+
|
| 23 |
+
# Embed PDF texts for similarity matching
|
| 24 |
+
@st.cache_resource
|
| 25 |
+
def create_embeddings(texts):
|
| 26 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 27 |
+
embeddings = embedder.encode(texts, convert_to_tensor=True)
|
| 28 |
+
return embedder, embeddings
|
| 29 |
+
|
| 30 |
+
# Query matching function
|
| 31 |
+
def get_relevant_content(query, texts, embedder, embeddings):
|
| 32 |
+
query_embedding = embedder.encode(query, convert_to_tensor=True)
|
| 33 |
+
scores = util.pytorch_cos_sim(query_embedding, embeddings)
|
| 34 |
+
best_idx = scores.argmax().item()
|
| 35 |
+
return texts[best_idx]
|
| 36 |
+
|
| 37 |
+
# Streamlit UI
|
| 38 |
+
def main():
|
| 39 |
+
st.title("Educational Assistant Chatbot")
|
| 40 |
+
st.markdown("Welcome! Ask me about academic programs, visa requirements, and more!")
|
| 41 |
+
|
| 42 |
+
# Sidebar for Admin Options
|
| 43 |
+
st.sidebar.header("Admin Options")
|
| 44 |
+
dataset_folder = st.sidebar.text_input("Dataset Folder Path", "./pdf_dataset")
|
| 45 |
+
email_to_send = st.sidebar.text_input("Email for Profile Submission", "application@aspireec.pk")
|
| 46 |
+
|
| 47 |
+
# Load model and dataset
|
| 48 |
+
st.sidebar.markdown("### Data Loading")
|
| 49 |
+
if st.sidebar.button("Load Dataset"):
|
| 50 |
+
st.session_state.pdf_texts = extract_text_from_pdfs("chatbot")
|
| 51 |
+
st.session_state.embedder, st.session_state.embeddings = create_embeddings(st.session_state.pdf_texts)
|
| 52 |
+
st.sidebar.success("Dataset Loaded Successfully!")
|
| 53 |
+
|
| 54 |
+
# Chat interface
|
| 55 |
+
user_query = st.text_input("Your Query:")
|
| 56 |
+
if st.button("Ask"):
|
| 57 |
+
if "embedder" not in st.session_state or "pdf_texts" not in st.session_state:
|
| 58 |
+
st.error("Dataset is not loaded. Please load the dataset in the sidebar.")
|
| 59 |
+
else:
|
| 60 |
+
relevant_content = get_relevant_content(
|
| 61 |
+
user_query, st.session_state.pdf_texts, st.session_state.embedder, st.session_state.embeddings
|
| 62 |
+
)
|
| 63 |
+
model = load_model()
|
| 64 |
+
response = model(f"Question: {user_query} Context: {relevant_content}", max_length=200)
|
| 65 |
+
st.success(response[0]['generated_text'])
|
| 66 |
+
|
| 67 |
+
# Profile creation
|
| 68 |
+
st.markdown("### Create a Student Profile")
|
| 69 |
+
name = st.text_input("Name:")
|
| 70 |
+
email = st.text_input("Email:")
|
| 71 |
+
contact_number = st.text_input("Contact Number (Optional):")
|
| 72 |
+
study_level = st.selectbox("Level of Study", ["Undergraduate", "Postgraduate", "PhD"])
|
| 73 |
+
field_of_interest = st.text_input("Field of Interest:")
|
| 74 |
+
career_goal = st.text_area("Career Goals:")
|
| 75 |
+
visa_query = st.text_area("Visa Concerns or Questions:")
|
| 76 |
+
|
| 77 |
+
if st.button("Submit Profile"):
|
| 78 |
+
profile = f"""
|
| 79 |
+
Name: {name}
|
| 80 |
+
Email: {email}
|
| 81 |
+
Contact Number: {contact_number}
|
| 82 |
+
Level of Study: {study_level}
|
| 83 |
+
Field of Interest: {field_of_interest}
|
| 84 |
+
Career Goals: {career_goal}
|
| 85 |
+
Visa Queries: {visa_query}
|
| 86 |
+
"""
|
| 87 |
+
# Simulate email sending (for simplicity, just display the profile)
|
| 88 |
+
st.markdown("### Profile Summary")
|
| 89 |
+
st.code(profile)
|
| 90 |
+
st.success(f"Profile submitted to {email_to_send}!")
|
| 91 |
+
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
main()
|