Spaces:

svnmurali1
/

RagProject

Sleeping

File size: 4,866 Bytes

import os
import streamlit as st
import time
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

from langchain_groq import ChatGroq

# ---------------------------------
# LLM
# ---------------------------------

llm=ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    api_key="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",
    max_tokens=100
)


# ---------------------------------
# Prompt
# ---------------------------------
rag_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a helpful AI assistant.\n"
     "Answer ONLY using the context provided.\n"
     "If the context does not contain the answer, say "
     "'I don't have enough information.'"),
    ("human",
     "Context:\n{context}\n\nQuestion:\n{question}")
])

# ---------------------------------
# Streamlit config
# ---------------------------------
st.set_page_config(page_title="RAG URL Chat", layout="wide")
st.title("🧠 RAG Chatbot with URLs")

# ---------------------------------
# Session state
# ---------------------------------
if "retriever" not in st.session_state:
    st.session_state.retriever = None

# ---------------------------------
# Sidebar
# ---------------------------------
st.sidebar.header("🔗 Input URLs")

urls_text = st.sidebar.text_area(
    "Enter URLs (one per line)",
    height=200,
    placeholder="https://example.com\nhttps://another.com"
)

process_btn = st.sidebar.button("🚀 Process URLs")

# ---------------------------------
# Process URLs
# ---------------------------------
if process_btn:
    if not urls_text.strip():
        st.sidebar.warning("Please enter at least one URL")
    else:
        with st.sidebar.spinner("Processing URLs..."):
            st.session_state.retriever = None
            st.session_state.vectorstore = None
            urls = [u.strip() for u in urls_text.split("\n") if u.strip()]
            headers = {
                "User-Agent": "Mozilla/5.0 (compatible; RAGBot/1.0; +https://example.com)"
              }
            loader = UnstructuredURLLoader(urls=urls,headers=headers)
            docs = loader.load()

            splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=200
            )
            splits = splitter.split_documents(docs)

            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-L6-v2"
            )

            vectorstore = Chroma.from_documents(splits, embeddings,collection_name=f"rag-{time.time()}")

            st.session_state.retriever = vectorstore.as_retriever(
                search_kwargs={"k": 4}
            )

        st.sidebar.success("✅ URLs processed successfully!")

# ---------------------------------
# Main UI
# ---------------------------------
st.subheader("💬 Ask a Question")

with st.form("chat_form", clear_on_submit=False):

    question = st.text_input(
        "Enter your question",
        placeholder="Ask something from the provided URLs..."
    )

    ask_btn = st.form_submit_button("Ask")

# ---------------------------------
# Answer + Sources
# ---------------------------------
if ask_btn:
    if st.session_state.retriever is None:
        st.warning("Please process URLs first")
    elif not question.strip():
        st.warning("Please enter a question")
    else:
        if ask_btn:
          if st.session_state.retriever is None:
            st.warning("Please process URLs first")
          elif not question.strip():
            st.warning("Please enter a question")
          else:
            with st.spinner("🤖 Generating answer..."):
                time.sleep(0.3)   # ensures spinner renders

                retriever = st.session_state.retriever

                rag_chain = (
                {
                    "context": retriever,
                    "question": RunnablePassthrough()
                }
                | rag_prompt
                | llm
                | StrOutputParser()
                )

                answer = rag_chain.invoke(question)
                docs = retriever.invoke(question)

                # Answer
                st.markdown("### ✅ Answer")
                st.write(answer)

                # Sources
                st.markdown("### 📚 Sources")
                for i, doc in enumerate(docs):
                    source = doc.metadata.get("source", "Unknown source")
                    st.write(f"{i+1}. {source}")