Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,269 +1,267 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
-
from langchain_community.
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
-
from langchain.chains
|
| 12 |
-
|
| 13 |
-
import
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
}
|
| 33 |
-
.
|
| 34 |
-
|
| 35 |
-
border
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
st.
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
"
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
"
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
response
|
| 108 |
-
response.
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
vector_store
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
("user", "
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
(
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
"
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
"
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
language
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
prompt
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
st.
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
st.
|
| 228 |
-
st.session_state.
|
| 229 |
-
st.session_state.
|
| 230 |
-
st.session_state.
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
st.session_state.
|
| 234 |
-
st.session_state.
|
| 235 |
-
st.session_state.
|
| 236 |
-
st.
|
| 237 |
-
st.
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
user_query
|
| 250 |
-
|
| 251 |
-
st.
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
st.
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
st.
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
st.markdown(f"**Summary / সামারি:** {summary}")
|
| 269 |
-
```
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
st.set_page_config(page_title="LazyAss Reader AI", layout="wide") # MUST be first
|
| 3 |
+
|
| 4 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
| 5 |
+
from langchain_community.document_loaders import WebBaseLoader
|
| 6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 8 |
+
from langchain_community.vectorstores import FAISS
|
| 9 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 10 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 11 |
+
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
|
| 12 |
+
import requests
|
| 13 |
+
import re
|
| 14 |
+
|
| 15 |
+
# --- Configuration ---
|
| 16 |
+
DEEPSEEK_API_KEY = "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"
|
| 17 |
+
EMBEDDING_MODEL_NAME = "BAAI/bge-small-en-v1.5"
|
| 18 |
+
DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions"
|
| 19 |
+
|
| 20 |
+
# --- Custom CSS for Professional UI ---
|
| 21 |
+
st.markdown("""
|
| 22 |
+
<style>
|
| 23 |
+
.main { background-color: #f9fafb; }
|
| 24 |
+
.stSidebar { background-color: #ffffff; border-right: 1px solid #e5e7eb; }
|
| 25 |
+
.stButton>button {
|
| 26 |
+
background-color: #2563eb;
|
| 27 |
+
color: white;
|
| 28 |
+
border-radius: 8px;
|
| 29 |
+
padding: 10px 20px;
|
| 30 |
+
font-weight: 500;
|
| 31 |
+
}
|
| 32 |
+
.stButton>button:hover { background-color: #1d4ed8; }
|
| 33 |
+
.stTextInput>div>input {
|
| 34 |
+
border-radius: 8px;
|
| 35 |
+
border: 1px solid #d1d5db;
|
| 36 |
+
padding: 10px;
|
| 37 |
+
}
|
| 38 |
+
.chat-message {
|
| 39 |
+
padding: 15px;
|
| 40 |
+
border-radius: 8px;
|
| 41 |
+
margin-bottom: 10px;
|
| 42 |
+
}
|
| 43 |
+
.human-message {
|
| 44 |
+
background-color: #dbeafe;
|
| 45 |
+
margin-left: 20%;
|
| 46 |
+
text-align: right;
|
| 47 |
+
}
|
| 48 |
+
.ai-message {
|
| 49 |
+
background-color: #f3f4f6;
|
| 50 |
+
margin-right: 20%;
|
| 51 |
+
}
|
| 52 |
+
h1 { color: #1f2937; font-size: 2.5rem; }
|
| 53 |
+
h2 { color: #374151; }
|
| 54 |
+
.sidebar-text {
|
| 55 |
+
color: #4b5563;
|
| 56 |
+
font-size: 1rem;
|
| 57 |
+
margin-top: 20px;
|
| 58 |
+
font-family: 'Noto Sans Bengali', sans-serif;
|
| 59 |
+
}
|
| 60 |
+
.stRadio > label { margin-right: 20px; }
|
| 61 |
+
</style>
|
| 62 |
+
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+Bengali:wght@400;700&display=swap" rel="stylesheet">
|
| 63 |
+
""", unsafe_allow_html=True)
|
| 64 |
+
|
| 65 |
+
# --- Initialization ---
|
| 66 |
+
@st.cache_resource
|
| 67 |
+
def get_embedding_model():
|
| 68 |
+
try:
|
| 69 |
+
return HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
st.error(f"🔴 Failed to load embedding model '{EMBEDDING_MODEL_NAME}': {e}")
|
| 72 |
+
st.stop()
|
| 73 |
+
embed_model = get_embedding_model()
|
| 74 |
+
|
| 75 |
+
st.title("LazyAss Reader AI 💬")
|
| 76 |
+
|
| 77 |
+
# Initialize session state
|
| 78 |
+
if "chat_history" not in st.session_state:
|
| 79 |
+
st.session_state.chat_history = [AIMessage(content="Hello! Please enter a website URL in the sidebar to begin. / হ্যালো! শুরু করতে সাইডবারে একটি ওয়েবসাইট URL লিখুন।")]
|
| 80 |
+
if "vector_store" not in st.session_state:
|
| 81 |
+
st.session_state.vector_store = None
|
| 82 |
+
if "processed_url" not in st.session_state:
|
| 83 |
+
st.session_state.processed_url = None
|
| 84 |
+
if "documents" not in st.session_state:
|
| 85 |
+
st.session_state.documents = None
|
| 86 |
+
|
| 87 |
+
# --- Core Functions ---
|
| 88 |
+
def detect_language(text):
|
| 89 |
+
"""Detect if the input text is Bengali or English."""
|
| 90 |
+
bengali_regex = re.compile(r'[\u0980-\u09FF]')
|
| 91 |
+
return "bn" if bengali_regex.search(text) else "en"
|
| 92 |
+
|
| 93 |
+
def call_deepseek(prompt, language):
|
| 94 |
+
"""Call DeepSeek API for chat or summarization."""
|
| 95 |
+
headers = {
|
| 96 |
+
"Authorization": f"Bearer {DEEPSEEK_API_KEY}",
|
| 97 |
+
"Content-Type": "application/json"
|
| 98 |
+
}
|
| 99 |
+
payload = {
|
| 100 |
+
"model": "deepseek-r1",
|
| 101 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 102 |
+
"max_tokens": 500,
|
| 103 |
+
"temperature": 0.7
|
| 104 |
+
}
|
| 105 |
+
try:
|
| 106 |
+
response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers)
|
| 107 |
+
response.raise_for_status()
|
| 108 |
+
return response.json()["choices"][0]["message"]["content"].strip()
|
| 109 |
+
except Exception as e:
|
| 110 |
+
return "একটি ত্রুটি ঘটেছে।" if language == "bn" else f"Error: {str(e)}"
|
| 111 |
+
|
| 112 |
+
def get_vector_store_from_url(url):
|
| 113 |
+
"""Process website and create FAISS vector store."""
|
| 114 |
+
if not url:
|
| 115 |
+
return None, "URL is empty."
|
| 116 |
+
try:
|
| 117 |
+
loader = WebBaseLoader(url)
|
| 118 |
+
documents = loader.load()
|
| 119 |
+
if not documents:
|
| 120 |
+
return None, "Could not load any content from the URL."
|
| 121 |
+
|
| 122 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 123 |
+
doc_chunks = text_splitter.split_documents(documents)
|
| 124 |
+
if not doc_chunks:
|
| 125 |
+
return None, "Loaded content but could not split it into chunks."
|
| 126 |
+
|
| 127 |
+
vector_store = FAISS.from_documents(doc_chunks, embed_model)
|
| 128 |
+
return vector_store, doc_chunks, None
|
| 129 |
+
except Exception as e:
|
| 130 |
+
return None, None, f"Failed to process URL '{url}': {str(e)}"
|
| 131 |
+
|
| 132 |
+
def get_context_retriever_chain(vector_store):
|
| 133 |
+
"""Create a retriever chain using DeepSeek."""
|
| 134 |
+
try:
|
| 135 |
+
retriever = vector_store.as_retriever()
|
| 136 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 137 |
+
MessagesPlaceholder(variable_name='chat_history'),
|
| 138 |
+
("user", "{input}"),
|
| 139 |
+
("user", "Given the above conversation, generate a search query to look up relevant information from the website content.")
|
| 140 |
+
])
|
| 141 |
+
def invoke_deepseek(input_dict):
|
| 142 |
+
language = detect_language(input_dict["input"])
|
| 143 |
+
prompt_text = prompt.format(**input_dict)
|
| 144 |
+
return {"context": call_deepseek(prompt_text, language)}
|
| 145 |
+
return create_history_aware_retriever(invoke_deepseek, retriever, prompt)
|
| 146 |
+
except Exception as e:
|
| 147 |
+
st.error(f"🔴 Error creating retriever chain: {e}")
|
| 148 |
+
return None
|
| 149 |
+
|
| 150 |
+
def get_conversational_rag_chain(retriever_chain):
|
| 151 |
+
"""Create a conversational RAG chain using DeepSeek."""
|
| 152 |
+
try:
|
| 153 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 154 |
+
("system", "You are an assistant for answering questions about a specific website. Use the provided context to answer in the user's language (Bengali or English). If the information is not in the context, state that you cannot answer based on the provided website content. Be concise and helpful.\n\nContext:\n{context}"),
|
| 155 |
+
MessagesPlaceholder(variable_name='chat_history'),
|
| 156 |
+
('user', "{input}"),
|
| 157 |
+
])
|
| 158 |
+
def invoke_deepseek(input_dict):
|
| 159 |
+
language = detect_language(input_dict["input"])
|
| 160 |
+
prompt_text = prompt.format(**input_dict)
|
| 161 |
+
return {"answer": call_deepseek(prompt_text, language)}
|
| 162 |
+
stuff_documents_chain = create_stuff_documents_chain(invoke_deepseek, prompt)
|
| 163 |
+
return create_retrieval_chain(retriever_chain, stuff_documents_chain)
|
| 164 |
+
except Exception as e:
|
| 165 |
+
st.error(f"🔴 Error creating RAG chain: {e}")
|
| 166 |
+
return None
|
| 167 |
+
|
| 168 |
+
def get_response(user_input):
|
| 169 |
+
"""Get response from the RAG chain."""
|
| 170 |
+
if st.session_state.vector_store is None:
|
| 171 |
+
return "⚠️ Please submit a valid website URL first using the sidebar."
|
| 172 |
+
|
| 173 |
+
retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
|
| 174 |
+
if not retriever_chain:
|
| 175 |
+
return "🔴 Error: Could not create the retriever chain."
|
| 176 |
+
|
| 177 |
+
conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
|
| 178 |
+
if not conversation_rag_chain:
|
| 179 |
+
return "🔴 Error: Could not create the conversation chain."
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
response = conversation_rag_chain.invoke({
|
| 183 |
+
"chat_history": st.session_state.chat_history,
|
| 184 |
+
"input": user_input
|
| 185 |
+
})
|
| 186 |
+
answer = response.get('answer', '').strip()
|
| 187 |
+
refusals = [
|
| 188 |
+
"I cannot answer based on the provided website content",
|
| 189 |
+
"information is not in the context",
|
| 190 |
+
"don't have information about that",
|
| 191 |
+
]
|
| 192 |
+
is_refusal = not answer or any(refusal in answer.lower() for refusal in refusals)
|
| 193 |
+
language = detect_language(user_input)
|
| 194 |
+
return answer if answer and not is_refusal else (
|
| 195 |
+
"আমি ওয়েবসাইটের তথ্যের উপর ভিত্তি করে আপনার প্রশ্নের উত্তর দিতে পারিনি।" if language == "bn"
|
| 196 |
+
else "I couldn't find relevant information on the website to answer your question."
|
| 197 |
+
)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
language = detect_language(user_input)
|
| 200 |
+
return "একটি ত্রুটি ঘটেছে।" if language == "bn" else f"🔴 Error processing your query: {str(e)}"
|
| 201 |
+
|
| 202 |
+
def summarize_content(language):
|
| 203 |
+
"""Summarize website content in the chosen language."""
|
| 204 |
+
if not st.session_state.documents:
|
| 205 |
+
return "⚠️ No website content available to summarize."
|
| 206 |
+
try:
|
| 207 |
+
context = " ".join([doc.page_content for doc in st.session_state.documents])[:2000]
|
| 208 |
+
prompt = f"Summarize the following website content in {'Bengali' if language == 'bn' else 'English'} in 100 words or less:\n\n{context}"
|
| 209 |
+
return call_deepseek(prompt, language)
|
| 210 |
+
except Exception as e:
|
| 211 |
+
return "একটি ত্রুটি ঘটেছে।" if language == "bn" else f"Error: {str(e)}"
|
| 212 |
+
|
| 213 |
+
# --- Streamlit UI ---
|
| 214 |
+
with st.sidebar:
|
| 215 |
+
st.header("Settings / সেটিংস")
|
| 216 |
+
web_url = st.text_input("Enter Website URL / ওয়েবসাইট URL লিখুন", key="url_input")
|
| 217 |
+
button_clicked = st.button("Load Website / ওয়েবসাইট লোড করুন", type="primary")
|
| 218 |
+
|
| 219 |
+
if button_clicked:
|
| 220 |
+
if not web_url:
|
| 221 |
+
st.warning("Please enter a website URL. / অনুগ্রহ করে একটি ওয়েবসাইট URL লিখুন।")
|
| 222 |
+
elif web_url != st.session_state.get("processed_url"):
|
| 223 |
+
with st.spinner(f"Processing {web_url}..."):
|
| 224 |
+
vector_store, documents, error_message = get_vector_store_from_url(web_url)
|
| 225 |
+
if error_message:
|
| 226 |
+
st.error(f"🔴 Failed: {error_message}")
|
| 227 |
+
st.session_state.vector_store = None
|
| 228 |
+
st.session_state.processed_url = None
|
| 229 |
+
st.session_state.documents = None
|
| 230 |
+
st.session_state.chat_history = [AIMessage(content="Failed to load the website. / ওয়েবসাইট লোড করতে ব্যর্থ।")]
|
| 231 |
+
elif vector_store:
|
| 232 |
+
st.session_state.vector_store = vector_store
|
| 233 |
+
st.session_state.documents = documents
|
| 234 |
+
st.session_state.processed_url = web_url
|
| 235 |
+
st.session_state.chat_history = [AIMessage(content=f"Website '{web_url}' loaded! How can I help you? / ওয়েবসাইট '{web_url}' লোড হয়েছে! আমি কীভাবে সাহায্য করতে পারি?")]
|
| 236 |
+
st.success("✅ Website loaded successfully! / ওয়েবসাইট সফলভাবে লোড হয়েছে!")
|
| 237 |
+
st.rerun()
|
| 238 |
+
else:
|
| 239 |
+
st.info("This URL has already been loaded. / এই URL ইতিমধ্যে লোড করা হয়েছে।")
|
| 240 |
+
|
| 241 |
+
st.markdown('<p class="sidebar-text">আপনি চ���ইলে বাংলায় প্রশ্নও করতে পারেন যেকোনো ইংরেজি দূর্বোধ্য সাইটের লেখাজোকা বুঝতে</p>', unsafe_allow_html=True)
|
| 242 |
+
|
| 243 |
+
st.subheader("Chat History / চ্যাট ইতিহাস")
|
| 244 |
+
for message in st.session_state.chat_history:
|
| 245 |
+
with st.chat_message("AI" if isinstance(message, AIMessage) else "Human"):
|
| 246 |
+
st.markdown(f'<div class="chat-message {"ai-message" if isinstance(message, AIMessage) else "human-message"}">{message.content}</div>', unsafe_allow_html=True)
|
| 247 |
+
|
| 248 |
+
user_query = st.chat_input("Ask a question about the website... / ওয়েবসাইট সম্পর্কে একটি প্রশ্ন জিজ্ঞাসা করুন...")
|
| 249 |
+
if user_query:
|
| 250 |
+
st.session_state.chat_history.append(HumanMessage(content=user_query))
|
| 251 |
+
with st.chat_message("Human"):
|
| 252 |
+
st.markdown(f'<div class="chat-message human-message">{user_query}</div>', unsafe_allow_html=True)
|
| 253 |
+
|
| 254 |
+
with st.spinner("Thinking... / চিন্তা করছে..."):
|
| 255 |
+
response_content = get_response(user_query)
|
| 256 |
+
|
| 257 |
+
st.session_state.chat_history.append(AIMessage(content=response_content))
|
| 258 |
+
with st.chat_message("AI"):
|
| 259 |
+
st.markdown(f'<div class="chat-message ai-message">{response_content}</div>', unsafe_allow_html=True)
|
| 260 |
+
|
| 261 |
+
# --- Summarize Section ---
|
| 262 |
+
st.subheader("Summarize Content / সামারি করুন")
|
| 263 |
+
summary_lang = st.radio("Select summary language / সামারি ভাষা নির্বাচন করুন:", ["English", "বাংলা"], horizontal=True)
|
| 264 |
+
if st.button("Summarize / সামারি করুন", type="primary"):
|
| 265 |
+
with st.spinner("Summarizing... / সামারি করছে..."):
|
| 266 |
+
summary = summarize_content("bn" if summary_lang == "বাংলা" else "en")
|
| 267 |
+
st.markdown(f"**Summary / সামারি:** {summary}")
|
|
|
|
|
|