Spaces:
Sleeping
Sleeping
File size: 4,866 Bytes
400006d 5e7371d 400006d 5e7371d 400006d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | import os
import streamlit as st
import time
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq
# ---------------------------------
# LLM
# ---------------------------------
llm=ChatGroq(
model="llama-3.1-8b-instant",
temperature=0,
api_key="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",
max_tokens=100
)
# ---------------------------------
# Prompt
# ---------------------------------
rag_prompt = ChatPromptTemplate.from_messages([
("system",
"You are a helpful AI assistant.\n"
"Answer ONLY using the context provided.\n"
"If the context does not contain the answer, say "
"'I don't have enough information.'"),
("human",
"Context:\n{context}\n\nQuestion:\n{question}")
])
# ---------------------------------
# Streamlit config
# ---------------------------------
st.set_page_config(page_title="RAG URL Chat", layout="wide")
st.title("π§ RAG Chatbot with URLs")
# ---------------------------------
# Session state
# ---------------------------------
if "retriever" not in st.session_state:
st.session_state.retriever = None
# ---------------------------------
# Sidebar
# ---------------------------------
st.sidebar.header("π Input URLs")
urls_text = st.sidebar.text_area(
"Enter URLs (one per line)",
height=200,
placeholder="https://example.com\nhttps://another.com"
)
process_btn = st.sidebar.button("π Process URLs")
# ---------------------------------
# Process URLs
# ---------------------------------
if process_btn:
if not urls_text.strip():
st.sidebar.warning("Please enter at least one URL")
else:
with st.sidebar.spinner("Processing URLs..."):
st.session_state.retriever = None
st.session_state.vectorstore = None
urls = [u.strip() for u in urls_text.split("\n") if u.strip()]
headers = {
"User-Agent": "Mozilla/5.0 (compatible; RAGBot/1.0; +https://example.com)"
}
loader = UnstructuredURLLoader(urls=urls,headers=headers)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
splits = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectorstore = Chroma.from_documents(splits, embeddings,collection_name=f"rag-{time.time()}")
st.session_state.retriever = vectorstore.as_retriever(
search_kwargs={"k": 4}
)
st.sidebar.success("β
URLs processed successfully!")
# ---------------------------------
# Main UI
# ---------------------------------
st.subheader("π¬ Ask a Question")
with st.form("chat_form", clear_on_submit=False):
question = st.text_input(
"Enter your question",
placeholder="Ask something from the provided URLs..."
)
ask_btn = st.form_submit_button("Ask")
# ---------------------------------
# Answer + Sources
# ---------------------------------
if ask_btn:
if st.session_state.retriever is None:
st.warning("Please process URLs first")
elif not question.strip():
st.warning("Please enter a question")
else:
if ask_btn:
if st.session_state.retriever is None:
st.warning("Please process URLs first")
elif not question.strip():
st.warning("Please enter a question")
else:
with st.spinner("π€ Generating answer..."):
time.sleep(0.3) # ensures spinner renders
retriever = st.session_state.retriever
rag_chain = (
{
"context": retriever,
"question": RunnablePassthrough()
}
| rag_prompt
| llm
| StrOutputParser()
)
answer = rag_chain.invoke(question)
docs = retriever.invoke(question)
# Answer
st.markdown("### β
Answer")
st.write(answer)
# Sources
st.markdown("### π Sources")
for i, doc in enumerate(docs):
source = doc.metadata.get("source", "Unknown source")
st.write(f"{i+1}. {source}")
|