Spaces:

muddasser
/

Webscrapping_Playwright

Sleeping

App Files Files Community

muddasser commited on 7 days ago

Commit

f929333

verified ·

1 Parent(s): 922f71a

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -14

app.py CHANGED Viewed

@@ -16,11 +16,11 @@ logging.basicConfig(
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-MODEL_NAME    = "google/long-t5-tglobal-large"
-MAX_INPUT_LEN = 16384   # LongT5's full context window
 st.set_page_config(
-    page_title="RAG · LongT5",
     page_icon="🕸️",
     layout="wide",
     initial_sidebar_state="collapsed"
@@ -303,8 +303,8 @@ def scrape_website(url):
 @st.cache_resource
 def create_vector_store(text):
     try:
-        # Larger chunks since LongT5 can handle much more context
-        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
         docs = [Document(page_content=c) for c in splitter.split_text(text)]
         emb = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
@@ -324,11 +324,10 @@ def answer_question(question):
     if tokenizer is None:
         return "Model failed to load. Check logs."
     try:
-        # Retrieve more chunks — LongT5 can handle it
-        docs    = st.session_state.vector_store.similarity_search(question, k=6)
-        context = " ".join(d.page_content for d in docs)
-        # LongT5 uses plain text prompt like T5 — no chat template needed
         prompt = (
             "Answer the question using only the context provided. "
             "If the answer is not in the context, say \"I don't know\".\n\n"
@@ -341,13 +340,13 @@ def answer_question(question):
             prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=MAX_INPUT_LEN,   # full 16,384 token window
         )
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=300,
                 num_beams=4,
                 early_stopping=True,
                 no_repeat_ngram_size=3,
@@ -369,7 +368,7 @@ with st.sidebar:
     st.markdown("**Model**")
     st.markdown(f"`{MODEL_NAME}`")
     st.markdown("**Context window**")
-    st.markdown("`16,384 tokens`")
     st.markdown("**Architecture**")
     st.markdown("`Encoder-Decoder`")
     st.markdown("**Status**")
@@ -390,7 +389,7 @@ st.markdown(f"""
     </div>
     <div class="model-badge">
         <div class="model-dot" style="background:{dot_color};"></div>
-        {dot_label} &nbsp;·&nbsp; LongT5-16k
     </div>
 </div>
 """, unsafe_allow_html=True)
@@ -460,7 +459,7 @@ if st.session_state.scraped_content:
         with st.chat_message("user"):
             st.markdown(prompt)
         with st.chat_message("assistant"):
-            with st.spinner("LongT5 is thinking…"):
                 answer = answer_question(prompt)
             st.markdown(answer)
         st.session_state.chat_history.append({"role": "assistant", "content": answer})

     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+MODEL_NAME    = "google/flan-t5-large"
+MAX_INPUT_LEN = 512   # FLAN-T5-large context window
 st.set_page_config(
+    page_title="RAG · FLAN-T5",
     page_icon="🕸️",
     layout="wide",
     initial_sidebar_state="collapsed"
 @st.cache_resource
 def create_vector_store(text):
     try:
+        # Small chunks so the single best one fits cleanly in 512 tokens
+        splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
         docs = [Document(page_content=c) for c in splitter.split_text(text)]
         emb = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
     if tokenizer is None:
         return "Model failed to load. Check logs."
     try:
+        # k=1 — single most relevant chunk keeps prompt tight within 512 tokens
+        docs    = st.session_state.vector_store.similarity_search(question, k=1)
+        context = docs[0].page_content
         prompt = (
             "Answer the question using only the context provided. "
             "If the answer is not in the context, say \"I don't know\".\n\n"
             prompt,
             return_tensors="pt",
             truncation=True,
+            max_length=MAX_INPUT_LEN,
         )
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=200,
                 num_beams=4,
                 early_stopping=True,
                 no_repeat_ngram_size=3,
     st.markdown("**Model**")
     st.markdown(f"`{MODEL_NAME}`")
     st.markdown("**Context window**")
+    st.markdown("`512 tokens`")
     st.markdown("**Architecture**")
     st.markdown("`Encoder-Decoder`")
     st.markdown("**Status**")
     </div>
     <div class="model-badge">
         <div class="model-dot" style="background:{dot_color};"></div>
+        {dot_label} &nbsp;·&nbsp; FLAN-T5-large
     </div>
 </div>
 """, unsafe_allow_html=True)
         with st.chat_message("user"):
             st.markdown(prompt)
         with st.chat_message("assistant"):
+            with st.spinner("FLAN-T5 is thinking…"):
                 answer = answer_question(prompt)
             st.markdown(answer)
         st.session_state.chat_history.append({"role": "assistant", "content": answer})