muddasser commited on
Commit
f929333
·
verified ·
1 Parent(s): 922f71a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -16,11 +16,11 @@ logging.basicConfig(
16
  format='%(asctime)s - %(levelname)s - %(message)s'
17
  )
18
 
19
- MODEL_NAME = "google/long-t5-tglobal-large"
20
- MAX_INPUT_LEN = 16384 # LongT5's full context window
21
 
22
  st.set_page_config(
23
- page_title="RAG · LongT5",
24
  page_icon="🕸️",
25
  layout="wide",
26
  initial_sidebar_state="collapsed"
@@ -303,8 +303,8 @@ def scrape_website(url):
303
  @st.cache_resource
304
  def create_vector_store(text):
305
  try:
306
- # Larger chunks since LongT5 can handle much more context
307
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
308
  docs = [Document(page_content=c) for c in splitter.split_text(text)]
309
  emb = HuggingFaceEmbeddings(
310
  model_name="sentence-transformers/all-MiniLM-L6-v2",
@@ -324,11 +324,10 @@ def answer_question(question):
324
  if tokenizer is None:
325
  return "Model failed to load. Check logs."
326
  try:
327
- # Retrieve more chunks LongT5 can handle it
328
- docs = st.session_state.vector_store.similarity_search(question, k=6)
329
- context = " ".join(d.page_content for d in docs)
330
 
331
- # LongT5 uses plain text prompt like T5 — no chat template needed
332
  prompt = (
333
  "Answer the question using only the context provided. "
334
  "If the answer is not in the context, say \"I don't know\".\n\n"
@@ -341,13 +340,13 @@ def answer_question(question):
341
  prompt,
342
  return_tensors="pt",
343
  truncation=True,
344
- max_length=MAX_INPUT_LEN, # full 16,384 token window
345
  )
346
 
347
  with torch.no_grad():
348
  outputs = model.generate(
349
  **inputs,
350
- max_new_tokens=300,
351
  num_beams=4,
352
  early_stopping=True,
353
  no_repeat_ngram_size=3,
@@ -369,7 +368,7 @@ with st.sidebar:
369
  st.markdown("**Model**")
370
  st.markdown(f"`{MODEL_NAME}`")
371
  st.markdown("**Context window**")
372
- st.markdown("`16,384 tokens`")
373
  st.markdown("**Architecture**")
374
  st.markdown("`Encoder-Decoder`")
375
  st.markdown("**Status**")
@@ -390,7 +389,7 @@ st.markdown(f"""
390
  </div>
391
  <div class="model-badge">
392
  <div class="model-dot" style="background:{dot_color};"></div>
393
- {dot_label} &nbsp;·&nbsp; LongT5-16k
394
  </div>
395
  </div>
396
  """, unsafe_allow_html=True)
@@ -460,7 +459,7 @@ if st.session_state.scraped_content:
460
  with st.chat_message("user"):
461
  st.markdown(prompt)
462
  with st.chat_message("assistant"):
463
- with st.spinner("LongT5 is thinking…"):
464
  answer = answer_question(prompt)
465
  st.markdown(answer)
466
  st.session_state.chat_history.append({"role": "assistant", "content": answer})
 
16
  format='%(asctime)s - %(levelname)s - %(message)s'
17
  )
18
 
19
+ MODEL_NAME = "google/flan-t5-large"
20
+ MAX_INPUT_LEN = 512 # FLAN-T5-large context window
21
 
22
  st.set_page_config(
23
+ page_title="RAG · FLAN-T5",
24
  page_icon="🕸️",
25
  layout="wide",
26
  initial_sidebar_state="collapsed"
 
303
  @st.cache_resource
304
  def create_vector_store(text):
305
  try:
306
+ # Small chunks so the single best one fits cleanly in 512 tokens
307
+ splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
308
  docs = [Document(page_content=c) for c in splitter.split_text(text)]
309
  emb = HuggingFaceEmbeddings(
310
  model_name="sentence-transformers/all-MiniLM-L6-v2",
 
324
  if tokenizer is None:
325
  return "Model failed to load. Check logs."
326
  try:
327
+ # k=1 single most relevant chunk keeps prompt tight within 512 tokens
328
+ docs = st.session_state.vector_store.similarity_search(question, k=1)
329
+ context = docs[0].page_content
330
 
 
331
  prompt = (
332
  "Answer the question using only the context provided. "
333
  "If the answer is not in the context, say \"I don't know\".\n\n"
 
340
  prompt,
341
  return_tensors="pt",
342
  truncation=True,
343
+ max_length=MAX_INPUT_LEN,
344
  )
345
 
346
  with torch.no_grad():
347
  outputs = model.generate(
348
  **inputs,
349
+ max_new_tokens=200,
350
  num_beams=4,
351
  early_stopping=True,
352
  no_repeat_ngram_size=3,
 
368
  st.markdown("**Model**")
369
  st.markdown(f"`{MODEL_NAME}`")
370
  st.markdown("**Context window**")
371
+ st.markdown("`512 tokens`")
372
  st.markdown("**Architecture**")
373
  st.markdown("`Encoder-Decoder`")
374
  st.markdown("**Status**")
 
389
  </div>
390
  <div class="model-badge">
391
  <div class="model-dot" style="background:{dot_color};"></div>
392
+ {dot_label} &nbsp;·&nbsp; FLAN-T5-large
393
  </div>
394
  </div>
395
  """, unsafe_allow_html=True)
 
459
  with st.chat_message("user"):
460
  st.markdown(prompt)
461
  with st.chat_message("assistant"):
462
+ with st.spinner("FLAN-T5 is thinking…"):
463
  answer = answer_question(prompt)
464
  st.markdown(answer)
465
  st.session_state.chat_history.append({"role": "assistant", "content": answer})