Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,11 +16,11 @@ logging.basicConfig(
|
|
| 16 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
| 17 |
)
|
| 18 |
|
| 19 |
-
MODEL_NAME = "google/
|
| 20 |
-
MAX_INPUT_LEN =
|
| 21 |
|
| 22 |
st.set_page_config(
|
| 23 |
-
page_title="RAG ·
|
| 24 |
page_icon="🕸️",
|
| 25 |
layout="wide",
|
| 26 |
initial_sidebar_state="collapsed"
|
|
@@ -303,8 +303,8 @@ def scrape_website(url):
|
|
| 303 |
@st.cache_resource
|
| 304 |
def create_vector_store(text):
|
| 305 |
try:
|
| 306 |
-
#
|
| 307 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=
|
| 308 |
docs = [Document(page_content=c) for c in splitter.split_text(text)]
|
| 309 |
emb = HuggingFaceEmbeddings(
|
| 310 |
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
|
@@ -324,11 +324,10 @@ def answer_question(question):
|
|
| 324 |
if tokenizer is None:
|
| 325 |
return "Model failed to load. Check logs."
|
| 326 |
try:
|
| 327 |
-
#
|
| 328 |
-
docs = st.session_state.vector_store.similarity_search(question, k=
|
| 329 |
-
context =
|
| 330 |
|
| 331 |
-
# LongT5 uses plain text prompt like T5 — no chat template needed
|
| 332 |
prompt = (
|
| 333 |
"Answer the question using only the context provided. "
|
| 334 |
"If the answer is not in the context, say \"I don't know\".\n\n"
|
|
@@ -341,13 +340,13 @@ def answer_question(question):
|
|
| 341 |
prompt,
|
| 342 |
return_tensors="pt",
|
| 343 |
truncation=True,
|
| 344 |
-
max_length=MAX_INPUT_LEN,
|
| 345 |
)
|
| 346 |
|
| 347 |
with torch.no_grad():
|
| 348 |
outputs = model.generate(
|
| 349 |
**inputs,
|
| 350 |
-
max_new_tokens=
|
| 351 |
num_beams=4,
|
| 352 |
early_stopping=True,
|
| 353 |
no_repeat_ngram_size=3,
|
|
@@ -369,7 +368,7 @@ with st.sidebar:
|
|
| 369 |
st.markdown("**Model**")
|
| 370 |
st.markdown(f"`{MODEL_NAME}`")
|
| 371 |
st.markdown("**Context window**")
|
| 372 |
-
st.markdown("`
|
| 373 |
st.markdown("**Architecture**")
|
| 374 |
st.markdown("`Encoder-Decoder`")
|
| 375 |
st.markdown("**Status**")
|
|
@@ -390,7 +389,7 @@ st.markdown(f"""
|
|
| 390 |
</div>
|
| 391 |
<div class="model-badge">
|
| 392 |
<div class="model-dot" style="background:{dot_color};"></div>
|
| 393 |
-
{dot_label} ·
|
| 394 |
</div>
|
| 395 |
</div>
|
| 396 |
""", unsafe_allow_html=True)
|
|
@@ -460,7 +459,7 @@ if st.session_state.scraped_content:
|
|
| 460 |
with st.chat_message("user"):
|
| 461 |
st.markdown(prompt)
|
| 462 |
with st.chat_message("assistant"):
|
| 463 |
-
with st.spinner("
|
| 464 |
answer = answer_question(prompt)
|
| 465 |
st.markdown(answer)
|
| 466 |
st.session_state.chat_history.append({"role": "assistant", "content": answer})
|
|
|
|
| 16 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
| 17 |
)
|
| 18 |
|
| 19 |
+
MODEL_NAME = "google/flan-t5-large"
|
| 20 |
+
MAX_INPUT_LEN = 512 # FLAN-T5-large context window
|
| 21 |
|
| 22 |
st.set_page_config(
|
| 23 |
+
page_title="RAG · FLAN-T5",
|
| 24 |
page_icon="🕸️",
|
| 25 |
layout="wide",
|
| 26 |
initial_sidebar_state="collapsed"
|
|
|
|
| 303 |
@st.cache_resource
|
| 304 |
def create_vector_store(text):
|
| 305 |
try:
|
| 306 |
+
# Small chunks so the single best one fits cleanly in 512 tokens
|
| 307 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
|
| 308 |
docs = [Document(page_content=c) for c in splitter.split_text(text)]
|
| 309 |
emb = HuggingFaceEmbeddings(
|
| 310 |
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
|
|
|
| 324 |
if tokenizer is None:
|
| 325 |
return "Model failed to load. Check logs."
|
| 326 |
try:
|
| 327 |
+
# k=1 — single most relevant chunk keeps prompt tight within 512 tokens
|
| 328 |
+
docs = st.session_state.vector_store.similarity_search(question, k=1)
|
| 329 |
+
context = docs[0].page_content
|
| 330 |
|
|
|
|
| 331 |
prompt = (
|
| 332 |
"Answer the question using only the context provided. "
|
| 333 |
"If the answer is not in the context, say \"I don't know\".\n\n"
|
|
|
|
| 340 |
prompt,
|
| 341 |
return_tensors="pt",
|
| 342 |
truncation=True,
|
| 343 |
+
max_length=MAX_INPUT_LEN,
|
| 344 |
)
|
| 345 |
|
| 346 |
with torch.no_grad():
|
| 347 |
outputs = model.generate(
|
| 348 |
**inputs,
|
| 349 |
+
max_new_tokens=200,
|
| 350 |
num_beams=4,
|
| 351 |
early_stopping=True,
|
| 352 |
no_repeat_ngram_size=3,
|
|
|
|
| 368 |
st.markdown("**Model**")
|
| 369 |
st.markdown(f"`{MODEL_NAME}`")
|
| 370 |
st.markdown("**Context window**")
|
| 371 |
+
st.markdown("`512 tokens`")
|
| 372 |
st.markdown("**Architecture**")
|
| 373 |
st.markdown("`Encoder-Decoder`")
|
| 374 |
st.markdown("**Status**")
|
|
|
|
| 389 |
</div>
|
| 390 |
<div class="model-badge">
|
| 391 |
<div class="model-dot" style="background:{dot_color};"></div>
|
| 392 |
+
{dot_label} · FLAN-T5-large
|
| 393 |
</div>
|
| 394 |
</div>
|
| 395 |
""", unsafe_allow_html=True)
|
|
|
|
| 459 |
with st.chat_message("user"):
|
| 460 |
st.markdown(prompt)
|
| 461 |
with st.chat_message("assistant"):
|
| 462 |
+
with st.spinner("FLAN-T5 is thinking…"):
|
| 463 |
answer = answer_question(prompt)
|
| 464 |
st.markdown(answer)
|
| 465 |
st.session_state.chat_history.append({"role": "assistant", "content": answer})
|