Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import time | |
| from langchain_community.document_loaders import UnstructuredURLLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_groq import ChatGroq | |
| # --------------------------------- | |
| # LLM | |
| # --------------------------------- | |
| llm=ChatGroq( | |
| model="llama-3.1-8b-instant", | |
| temperature=0, | |
| api_key="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", | |
| max_tokens=100 | |
| ) | |
| # --------------------------------- | |
| # Prompt | |
| # --------------------------------- | |
| rag_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", | |
| "You are a helpful AI assistant.\n" | |
| "Answer ONLY using the context provided.\n" | |
| "If the context does not contain the answer, say " | |
| "'I don't have enough information.'"), | |
| ("human", | |
| "Context:\n{context}\n\nQuestion:\n{question}") | |
| ]) | |
| # --------------------------------- | |
| # Streamlit config | |
| # --------------------------------- | |
| st.set_page_config(page_title="RAG URL Chat", layout="wide") | |
| st.title("π§ RAG Chatbot with URLs") | |
| # --------------------------------- | |
| # Session state | |
| # --------------------------------- | |
| if "retriever" not in st.session_state: | |
| st.session_state.retriever = None | |
| # --------------------------------- | |
| # Sidebar | |
| # --------------------------------- | |
| st.sidebar.header("π Input URLs") | |
| urls_text = st.sidebar.text_area( | |
| "Enter URLs (one per line)", | |
| height=200, | |
| placeholder="https://example.com\nhttps://another.com" | |
| ) | |
| process_btn = st.sidebar.button("π Process URLs") | |
| # --------------------------------- | |
| # Process URLs | |
| # --------------------------------- | |
| if process_btn: | |
| if not urls_text.strip(): | |
| st.sidebar.warning("Please enter at least one URL") | |
| else: | |
| with st.sidebar.spinner("Processing URLs..."): | |
| st.session_state.retriever = None | |
| st.session_state.vectorstore = None | |
| urls = [u.strip() for u in urls_text.split("\n") if u.strip()] | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (compatible; RAGBot/1.0; +https://example.com)" | |
| } | |
| loader = UnstructuredURLLoader(urls=urls,headers=headers) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200 | |
| ) | |
| splits = splitter.split_documents(docs) | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vectorstore = Chroma.from_documents(splits, embeddings,collection_name=f"rag-{time.time()}") | |
| st.session_state.retriever = vectorstore.as_retriever( | |
| search_kwargs={"k": 4} | |
| ) | |
| st.sidebar.success("β URLs processed successfully!") | |
| # --------------------------------- | |
| # Main UI | |
| # --------------------------------- | |
| st.subheader("π¬ Ask a Question") | |
| with st.form("chat_form", clear_on_submit=False): | |
| question = st.text_input( | |
| "Enter your question", | |
| placeholder="Ask something from the provided URLs..." | |
| ) | |
| ask_btn = st.form_submit_button("Ask") | |
| # --------------------------------- | |
| # Answer + Sources | |
| # --------------------------------- | |
| if ask_btn: | |
| if st.session_state.retriever is None: | |
| st.warning("Please process URLs first") | |
| elif not question.strip(): | |
| st.warning("Please enter a question") | |
| else: | |
| if ask_btn: | |
| if st.session_state.retriever is None: | |
| st.warning("Please process URLs first") | |
| elif not question.strip(): | |
| st.warning("Please enter a question") | |
| else: | |
| with st.spinner("π€ Generating answer..."): | |
| time.sleep(0.3) # ensures spinner renders | |
| retriever = st.session_state.retriever | |
| rag_chain = ( | |
| { | |
| "context": retriever, | |
| "question": RunnablePassthrough() | |
| } | |
| | rag_prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| answer = rag_chain.invoke(question) | |
| docs = retriever.invoke(question) | |
| # Answer | |
| st.markdown("### β Answer") | |
| st.write(answer) | |
| # Sources | |
| st.markdown("### π Sources") | |
| for i, doc in enumerate(docs): | |
| source = doc.metadata.get("source", "Unknown source") | |
| st.write(f"{i+1}. {source}") | |