File size: 4,866 Bytes
400006d
5e7371d
400006d
 
 
 
 
5e7371d
400006d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import streamlit as st
import time
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

from langchain_groq import ChatGroq

# ---------------------------------
# LLM
# ---------------------------------

llm=ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    api_key="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",
    max_tokens=100
)


# ---------------------------------
# Prompt
# ---------------------------------
rag_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a helpful AI assistant.\n"
     "Answer ONLY using the context provided.\n"
     "If the context does not contain the answer, say "
     "'I don't have enough information.'"),
    ("human",
     "Context:\n{context}\n\nQuestion:\n{question}")
])

# ---------------------------------
# Streamlit config
# ---------------------------------
st.set_page_config(page_title="RAG URL Chat", layout="wide")
st.title("🧠 RAG Chatbot with URLs")

# ---------------------------------
# Session state
# ---------------------------------
if "retriever" not in st.session_state:
    st.session_state.retriever = None

# ---------------------------------
# Sidebar
# ---------------------------------
st.sidebar.header("πŸ”— Input URLs")

urls_text = st.sidebar.text_area(
    "Enter URLs (one per line)",
    height=200,
    placeholder="https://example.com\nhttps://another.com"
)

process_btn = st.sidebar.button("πŸš€ Process URLs")

# ---------------------------------
# Process URLs
# ---------------------------------
if process_btn:
    if not urls_text.strip():
        st.sidebar.warning("Please enter at least one URL")
    else:
        with st.sidebar.spinner("Processing URLs..."):
            st.session_state.retriever = None
            st.session_state.vectorstore = None
            urls = [u.strip() for u in urls_text.split("\n") if u.strip()]
            headers = {
                "User-Agent": "Mozilla/5.0 (compatible; RAGBot/1.0; +https://example.com)"
              }
            loader = UnstructuredURLLoader(urls=urls,headers=headers)
            docs = loader.load()

            splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=200
            )
            splits = splitter.split_documents(docs)

            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-L6-v2"
            )

            vectorstore = Chroma.from_documents(splits, embeddings,collection_name=f"rag-{time.time()}")

            st.session_state.retriever = vectorstore.as_retriever(
                search_kwargs={"k": 4}
            )

        st.sidebar.success("βœ… URLs processed successfully!")

# ---------------------------------
# Main UI
# ---------------------------------
st.subheader("πŸ’¬ Ask a Question")

with st.form("chat_form", clear_on_submit=False):

    question = st.text_input(
        "Enter your question",
        placeholder="Ask something from the provided URLs..."
    )

    ask_btn = st.form_submit_button("Ask")

# ---------------------------------
# Answer + Sources
# ---------------------------------
if ask_btn:
    if st.session_state.retriever is None:
        st.warning("Please process URLs first")
    elif not question.strip():
        st.warning("Please enter a question")
    else:
        if ask_btn:
          if st.session_state.retriever is None:
            st.warning("Please process URLs first")
          elif not question.strip():
            st.warning("Please enter a question")
          else:
            with st.spinner("πŸ€– Generating answer..."):
                time.sleep(0.3)   # ensures spinner renders

                retriever = st.session_state.retriever

                rag_chain = (
                {
                    "context": retriever,
                    "question": RunnablePassthrough()
                }
                | rag_prompt
                | llm
                | StrOutputParser()
                )

                answer = rag_chain.invoke(question)
                docs = retriever.invoke(question)

                # Answer
                st.markdown("### βœ… Answer")
                st.write(answer)

                # Sources
                st.markdown("### πŸ“š Sources")
                for i, doc in enumerate(docs):
                    source = doc.metadata.get("source", "Unknown source")
                    st.write(f"{i+1}. {source}")