Spaces:

svnmurali1
/

RagProject

Sleeping

App Files Files Community

RagProject / src /streamlit_app.py

svnmurali1

added the rag streamlit application

400006d verified 3 months ago

raw

history blame contribute delete

4.87 kB

	import os
	import streamlit as st
	import time
	from langchain_community.document_loaders import UnstructuredURLLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import Chroma

	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.runnables import RunnablePassthrough, RunnableLambda
	from langchain_core.output_parsers import StrOutputParser

	from langchain_groq import ChatGroq

	# ---------------------------------
	# LLM
	# ---------------------------------

	llm=ChatGroq(
	model="llama-3.1-8b-instant",
	temperature=0,
	api_key="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",
	max_tokens=100
	)


	# ---------------------------------
	# Prompt
	# ---------------------------------
	rag_prompt = ChatPromptTemplate.from_messages([
	("system",
	"You are a helpful AI assistant.\n"
	"Answer ONLY using the context provided.\n"
	"If the context does not contain the answer, say "
	"'I don't have enough information.'"),
	("human",
	"Context:\n{context}\n\nQuestion:\n{question}")
	])

	# ---------------------------------
	# Streamlit config
	# ---------------------------------
	st.set_page_config(page_title="RAG URL Chat", layout="wide")
	st.title("🧠 RAG Chatbot with URLs")

	# ---------------------------------
	# Session state
	# ---------------------------------
	if "retriever" not in st.session_state:
	st.session_state.retriever = None

	# ---------------------------------
	# Sidebar
	# ---------------------------------
	st.sidebar.header("🔗 Input URLs")

	urls_text = st.sidebar.text_area(
	"Enter URLs (one per line)",
	height=200,
	placeholder="https://example.com\nhttps://another.com"
	)

	process_btn = st.sidebar.button("🚀 Process URLs")

	# ---------------------------------
	# Process URLs
	# ---------------------------------
	if process_btn:
	if not urls_text.strip():
	st.sidebar.warning("Please enter at least one URL")
	else:
	with st.sidebar.spinner("Processing URLs..."):
	st.session_state.retriever = None
	st.session_state.vectorstore = None
	urls = [u.strip() for u in urls_text.split("\n") if u.strip()]
	headers = {
	"User-Agent": "Mozilla/5.0 (compatible; RAGBot/1.0; +https://example.com)"
	}
	loader = UnstructuredURLLoader(urls=urls,headers=headers)
	docs = loader.load()

	splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200
	)
	splits = splitter.split_documents(docs)

	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)

	vectorstore = Chroma.from_documents(splits, embeddings,collection_name=f"rag-{time.time()}")

	st.session_state.retriever = vectorstore.as_retriever(
	search_kwargs={"k": 4}
	)

	st.sidebar.success("✅ URLs processed successfully!")

	# ---------------------------------
	# Main UI
	# ---------------------------------
	st.subheader("💬 Ask a Question")

	with st.form("chat_form", clear_on_submit=False):

	question = st.text_input(
	"Enter your question",
	placeholder="Ask something from the provided URLs..."
	)

	ask_btn = st.form_submit_button("Ask")

	# ---------------------------------
	# Answer + Sources
	# ---------------------------------
	if ask_btn:
	if st.session_state.retriever is None:
	st.warning("Please process URLs first")
	elif not question.strip():
	st.warning("Please enter a question")
	else:
	if ask_btn:
	if st.session_state.retriever is None:
	st.warning("Please process URLs first")
	elif not question.strip():
	st.warning("Please enter a question")
	else:
	with st.spinner("🤖 Generating answer..."):
	time.sleep(0.3) # ensures spinner renders

	retriever = st.session_state.retriever

	rag_chain = (
	{
	"context": retriever,
	"question": RunnablePassthrough()
	}
	\| rag_prompt
	\| llm
	\| StrOutputParser()
	)

	answer = rag_chain.invoke(question)
	docs = retriever.invoke(question)

	# Answer
	st.markdown("### ✅ Answer")
	st.write(answer)

	# Sources
	st.markdown("### 📚 Sources")
	for i, doc in enumerate(docs):
	source = doc.metadata.get("source", "Unknown source")
	st.write(f"{i+1}. {source}")