kinely commited on
Commit
f4947f0
·
verified ·
1 Parent(s): 4595fcd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from PyPDF2 import PdfReader
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import os
6
+
7
+ # Load Hugging Face model (e.g., FLAN-T5 or GPT-like model)
8
+ @st.cache_resource
9
+ def load_model():
10
+ return pipeline("text2text-generation", model="google/flan-t5-base")
11
+
12
+ # Extract text from PDF files in the dataset folder
13
+ def extract_text_from_pdfs(folder_path):
14
+ pdf_texts = []
15
+ for filename in os.listdir(folder_path):
16
+ if filename.endswith(".pdf"):
17
+ pdf_path = os.path.join(folder_path, filename)
18
+ reader = PdfReader(pdf_path)
19
+ text = " ".join([page.extract_text() for page in reader.pages])
20
+ pdf_texts.append(text)
21
+ return pdf_texts
22
+
23
+ # Embed PDF texts for similarity matching
24
+ @st.cache_resource
25
+ def create_embeddings(texts):
26
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
27
+ embeddings = embedder.encode(texts, convert_to_tensor=True)
28
+ return embedder, embeddings
29
+
30
+ # Query matching function
31
+ def get_relevant_content(query, texts, embedder, embeddings):
32
+ query_embedding = embedder.encode(query, convert_to_tensor=True)
33
+ scores = util.pytorch_cos_sim(query_embedding, embeddings)
34
+ best_idx = scores.argmax().item()
35
+ return texts[best_idx]
36
+
37
+ # Streamlit UI
38
+ def main():
39
+ st.title("Educational Assistant Chatbot")
40
+ st.markdown("Welcome! Ask me about academic programs, visa requirements, and more!")
41
+
42
+ # Sidebar for Admin Options
43
+ st.sidebar.header("Admin Options")
44
+ dataset_folder = st.sidebar.text_input("Dataset Folder Path", "./pdf_dataset")
45
+ email_to_send = st.sidebar.text_input("Email for Profile Submission", "application@aspireec.pk")
46
+
47
+ # Load model and dataset
48
+ st.sidebar.markdown("### Data Loading")
49
+ if st.sidebar.button("Load Dataset"):
50
+ st.session_state.pdf_texts = extract_text_from_pdfs("chatbot")
51
+ st.session_state.embedder, st.session_state.embeddings = create_embeddings(st.session_state.pdf_texts)
52
+ st.sidebar.success("Dataset Loaded Successfully!")
53
+
54
+ # Chat interface
55
+ user_query = st.text_input("Your Query:")
56
+ if st.button("Ask"):
57
+ if "embedder" not in st.session_state or "pdf_texts" not in st.session_state:
58
+ st.error("Dataset is not loaded. Please load the dataset in the sidebar.")
59
+ else:
60
+ relevant_content = get_relevant_content(
61
+ user_query, st.session_state.pdf_texts, st.session_state.embedder, st.session_state.embeddings
62
+ )
63
+ model = load_model()
64
+ response = model(f"Question: {user_query} Context: {relevant_content}", max_length=200)
65
+ st.success(response[0]['generated_text'])
66
+
67
+ # Profile creation
68
+ st.markdown("### Create a Student Profile")
69
+ name = st.text_input("Name:")
70
+ email = st.text_input("Email:")
71
+ contact_number = st.text_input("Contact Number (Optional):")
72
+ study_level = st.selectbox("Level of Study", ["Undergraduate", "Postgraduate", "PhD"])
73
+ field_of_interest = st.text_input("Field of Interest:")
74
+ career_goal = st.text_area("Career Goals:")
75
+ visa_query = st.text_area("Visa Concerns or Questions:")
76
+
77
+ if st.button("Submit Profile"):
78
+ profile = f"""
79
+ Name: {name}
80
+ Email: {email}
81
+ Contact Number: {contact_number}
82
+ Level of Study: {study_level}
83
+ Field of Interest: {field_of_interest}
84
+ Career Goals: {career_goal}
85
+ Visa Queries: {visa_query}
86
+ """
87
+ # Simulate email sending (for simplicity, just display the profile)
88
+ st.markdown("### Profile Summary")
89
+ st.code(profile)
90
+ st.success(f"Profile submitted to {email_to_send}!")
91
+
92
+ if __name__ == "__main__":
93
+ main()