Spaces:

rizwankhan2123
/

Semantic-Search-Engine

Running

App Files Files Community

Semantic-Search-Engine / app.py

rizwankhan2123

fix problems

eba760d verified 4 days ago

raw

history blame contribute delete

5.38 kB

	import streamlit as st
	import chromadb
	from sentence_transformers import SentenceTransformer
	import uuid

	# ==========================================
	# PAGE CONFIG
	# ==========================================

	st.set_page_config(
	page_title="Semantic Search Engine",
	page_icon="🔍",
	layout="wide"
	)

	# ==========================================
	# CUSTOM CSS
	# ==========================================

	st.markdown("""
	<style>

	.main {
	padding-top: 1rem;
	}

	.block-container {
	padding-top: 2rem;
	}

	.result-box {
	padding: 1rem;
	border-radius: 12px;
	border: 1px solid #333;
	margin-bottom: 10px;
	}

	</style>
	""", unsafe_allow_html=True)

	# ==========================================
	# TITLE
	# ==========================================

	st.title("🔍 Semantic Search Engine")
	st.caption(
	"Search documents using semantic similarity powered by Hugging Face embeddings."
	)

	# ==========================================
	# LOAD MODEL
	# ==========================================

	@st.cache_resource
	def load_model():
	return SentenceTransformer(
	"sentence-transformers/all-MiniLM-L6-v2"
	)

	model = load_model()

	# ==========================================
	# CHROMADB
	# ==========================================

	client = chromadb.PersistentClient(
	path="./chroma_db"
	)

	collection = client.get_or_create_collection(
	name="documents"
	)

	# ==========================================
	# SIDEBAR
	# ==========================================

	with st.sidebar:

	st.header("⚙️ Settings")

	top_k = st.slider(
	"Number of Results",
	min_value=1,
	max_value=10,
	value=5
	)

	st.markdown("---")

	st.info(
	"Semantic Search compares meanings instead of matching exact keywords."
	)

	# ==========================================
	# DATABASE STATS
	# ==========================================

	st.markdown("## 📊 Database Statistics")

	col1, col2 = st.columns(2)

	with col1:
	st.metric(
	"Documents Stored",
	collection.count()
	)

	with col2:
	st.metric(
	"Embedding Model",
	"MiniLM-L6-v2"
	)

	# ==========================================
	# DOCUMENT INPUT
	# ==========================================

	st.markdown("---")
	st.markdown("## 📥 Add Documents")

	documents = st.text_area(
	"Enter documents (one document per line)",
	height=220,
	placeholder="""
	Python is a programming language.
	FastAPI is used to build APIs.
	Machine learning learns patterns from data.
	ChromaDB stores embeddings.
	"""
	)

	if st.button("💾 Store Documents"):

	docs = [
	doc.strip()
	for doc in documents.split("\n")
	if doc.strip()
	]

	if len(docs) == 0:
	st.warning("Please enter at least one document.")

	else:

	with st.spinner("Generating embeddings..."):

	embeddings = model.encode(
	docs
	).tolist()

	collection.add(
	ids=[
	str(uuid.uuid4())
	for _ in docs
	],
	documents=docs,
	embeddings=embeddings
	)

	st.success(
	f"{len(docs)} document(s) stored successfully."
	)

	st.rerun()

	# ==========================================
	# SEARCH SECTION
	# ==========================================

	st.markdown("---")
	st.markdown("## 🔎 Search")

	query = st.text_input(
	"Enter your search query",
	placeholder="How can I build an API?"
	)

	if st.button(
	"🚀 Search",
	use_container_width=True
	):

	if collection.count() == 0:

	st.error(
	"No documents available. Add documents first."
	)

	elif not query.strip():

	st.warning(
	"Please enter a search query."
	)

	else:

	with st.spinner(
	"Searching similar documents..."
	):

	query_embedding = model.encode(
	query
	).tolist()

	results = collection.query(
	query_embeddings=[
	query_embedding
	],
	n_results=min(
	top_k,
	collection.count()
	)
	)

	docs = results["documents"][0]
	distances = results["distances"][0]

	st.markdown("---")
	st.markdown("## 📄 Search Results")

	for rank, (doc, distance) in enumerate(
	zip(docs, distances),
	start=1
	):

	# Relevance Label
	if distance < 0.7:
	relevance = "🟢 Highly Relevant"
	elif distance < 1.2:
	relevance = "🟡 Relevant"
	else:
	relevance = "🔴 Weak Match"

	with st.expander(
	f"#{rank} \| {relevance}"
	):
	st.write(doc)

	st.caption(
	f"Distance Score: {distance:.4f}"
	)

	# ==========================================
	# FOOTER
	# ==========================================

	st.markdown("---")