rizwankhan2123's picture
fix problems
eba760d verified
import streamlit as st
import chromadb
from sentence_transformers import SentenceTransformer
import uuid
# ==========================================
# PAGE CONFIG
# ==========================================
st.set_page_config(
page_title="Semantic Search Engine",
page_icon="πŸ”",
layout="wide"
)
# ==========================================
# CUSTOM CSS
# ==========================================
st.markdown("""
<style>
.main {
padding-top: 1rem;
}
.block-container {
padding-top: 2rem;
}
.result-box {
padding: 1rem;
border-radius: 12px;
border: 1px solid #333;
margin-bottom: 10px;
}
</style>
""", unsafe_allow_html=True)
# ==========================================
# TITLE
# ==========================================
st.title("πŸ” Semantic Search Engine")
st.caption(
"Search documents using semantic similarity powered by Hugging Face embeddings."
)
# ==========================================
# LOAD MODEL
# ==========================================
@st.cache_resource
def load_model():
return SentenceTransformer(
"sentence-transformers/all-MiniLM-L6-v2"
)
model = load_model()
# ==========================================
# CHROMADB
# ==========================================
client = chromadb.PersistentClient(
path="./chroma_db"
)
collection = client.get_or_create_collection(
name="documents"
)
# ==========================================
# SIDEBAR
# ==========================================
with st.sidebar:
st.header("βš™οΈ Settings")
top_k = st.slider(
"Number of Results",
min_value=1,
max_value=10,
value=5
)
st.markdown("---")
st.info(
"Semantic Search compares meanings instead of matching exact keywords."
)
# ==========================================
# DATABASE STATS
# ==========================================
st.markdown("## πŸ“Š Database Statistics")
col1, col2 = st.columns(2)
with col1:
st.metric(
"Documents Stored",
collection.count()
)
with col2:
st.metric(
"Embedding Model",
"MiniLM-L6-v2"
)
# ==========================================
# DOCUMENT INPUT
# ==========================================
st.markdown("---")
st.markdown("## πŸ“₯ Add Documents")
documents = st.text_area(
"Enter documents (one document per line)",
height=220,
placeholder="""
Python is a programming language.
FastAPI is used to build APIs.
Machine learning learns patterns from data.
ChromaDB stores embeddings.
"""
)
if st.button("πŸ’Ύ Store Documents"):
docs = [
doc.strip()
for doc in documents.split("\n")
if doc.strip()
]
if len(docs) == 0:
st.warning("Please enter at least one document.")
else:
with st.spinner("Generating embeddings..."):
embeddings = model.encode(
docs
).tolist()
collection.add(
ids=[
str(uuid.uuid4())
for _ in docs
],
documents=docs,
embeddings=embeddings
)
st.success(
f"{len(docs)} document(s) stored successfully."
)
st.rerun()
# ==========================================
# SEARCH SECTION
# ==========================================
st.markdown("---")
st.markdown("## πŸ”Ž Search")
query = st.text_input(
"Enter your search query",
placeholder="How can I build an API?"
)
if st.button(
"πŸš€ Search",
use_container_width=True
):
if collection.count() == 0:
st.error(
"No documents available. Add documents first."
)
elif not query.strip():
st.warning(
"Please enter a search query."
)
else:
with st.spinner(
"Searching similar documents..."
):
query_embedding = model.encode(
query
).tolist()
results = collection.query(
query_embeddings=[
query_embedding
],
n_results=min(
top_k,
collection.count()
)
)
docs = results["documents"][0]
distances = results["distances"][0]
st.markdown("---")
st.markdown("## πŸ“„ Search Results")
for rank, (doc, distance) in enumerate(
zip(docs, distances),
start=1
):
# Relevance Label
if distance < 0.7:
relevance = "🟒 Highly Relevant"
elif distance < 1.2:
relevance = "🟑 Relevant"
else:
relevance = "πŸ”΄ Weak Match"
with st.expander(
f"#{rank} | {relevance}"
):
st.write(doc)
st.caption(
f"Distance Score: {distance:.4f}"
)
# ==========================================
# FOOTER
# ==========================================
st.markdown("---")