Spaces:
Sleeping
Sleeping
File size: 3,260 Bytes
f7d91c8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | # ===============================
# π¦ Install dependencies (Only for Colab)
# ===============================
# !pip install streamlit faiss-cpu PyPDF2 sentence-transformers
# ===============================
# π Imports
# ===============================
import os
import numpy as np
import PyPDF2
import streamlit as st
from sentence_transformers import SentenceTransformer
import faiss
# β
Set Streamlit page configuration at the top
st.set_page_config(
page_title="π
Exam Schedule Chatbot",
page_icon="π€",
layout="wide"
)
# ===============================
# π¨ Styling
# ===============================
st.markdown("""
<style>
.main-title {
font-size: 40px;
font-weight: 800;
color: #1f77b4;
text-align: center;
}
.sub-title {
font-size: 20px;
color: #555;
text-align: center;
}
.stTextInput > div > input {
font-size: 16px;
height: 3em;
}
.stFileUploader {
margin-bottom: 20px;
}
</style>
""", unsafe_allow_html=True)
# ===============================
# π§ Load PDF & Extract Text
# ===============================
def extract_text_from_pdf(pdf_file):
reader = PyPDF2.PdfReader(pdf_file)
text = ''
for page in reader.pages:
content = page.extract_text()
if content:
text += content
return text
# ===============================
# π§© Embed Text and Create FAISS Index
# ===============================
def embed_and_index(text, model):
chunks = text.split("\n")
docs = [chunk.strip() for chunk in chunks if chunk.strip()]
vectors = model.encode(docs, convert_to_tensor=False)
index = faiss.IndexFlatL2(len(vectors[0]))
index.add(np.array(vectors))
return docs, index
# ===============================
# π€ Query with Context
# ===============================
def query_with_context(question, docs, index, model):
question_vec = model.encode([question], convert_to_tensor=False)
D, I = index.search(np.array(question_vec), k=3)
context = "\n".join([docs[i] for i in I[0]])
return f"π **Relevant Information:**\n\n{context}"
# ===============================
# π¬ UI
# ===============================
st.markdown('<div class="main-title">π
University Exam Schedule Chatbot</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-title">Ask questions like "When is the AI exam?" or "Date of Software Engineering paper?"</div>', unsafe_allow_html=True)
uploaded_file = st.file_uploader("π€ Upload your Date Sheet PDF", type="pdf")
query = st.text_input("β Ask something about your exam schedule:")
if uploaded_file and query:
if 'docs' not in st.session_state:
text = extract_text_from_pdf(uploaded_file)
model = SentenceTransformer("all-MiniLM-L6-v2")
st.session_state.docs, st.session_state.index = embed_and_index(text, model)
st.session_state.embedding_model = model
answer = query_with_context(
query,
st.session_state.docs,
st.session_state.index,
st.session_state.embedding_model
)
st.markdown("### π¬ Answer:")
st.success(answer)
|