Spaces:

Email-addon
/

GmailAddOn

Sleeping

App Files Files Community

fsojni commited on May 27, 2025

Commit

f1365f3

verified ·

1 Parent(s): 5dce132

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -18

app.py CHANGED Viewed

@@ -19,6 +19,18 @@ CHAT_MODEL_ID = "NousResearch/Meta-Llama-3-8B-Instruct"
 EMB_MODEL_ID  = "mixedbread-ai/mxbai-embed-large-v1"
 MAX_PROMPT_TOKENS = 8192
 # --- lazy loaders (unchanged) -------------------------------------------------
 tokenizer, chat_model = None, None
 emb_tokenizer, emb_model = None, None
@@ -60,13 +72,13 @@ def embed(text:str)->torch.Tensor:
 kb = defaultdict(lambda: {"texts": [], "vecs": None})
-def add_docs(user_id: str, docs: list[str]) -> int:
-    """Embed *docs* and append them to the KB for *user_id*.
-       Returns the number of docs actually stored."""
-    docs = [t for t in docs if t.strip()]          # skip blanks
-    if not docs:
-        return 0
     load_embedder()                                # lazy-load once
     new_vecs = torch.stack([embed(t) for t in docs]).cpu()
     store = kb[user_id]                            # auto-creates via defaultdict
@@ -117,10 +129,9 @@ def build_llm_prompt(system: str, context: list[str], user_question: str) -> str
     return prompt
 # ---------- 4. Gradio playground (same UI as before) --------------------------
-def store_doc(doc_text: str, user_id="demo"):
-    """UI callback: take the textbox content and shove it into the KB."""
     try:
-        n = add_docs(user_id, [doc_text])
         if n == 0:
             return "Nothing stored (empty input)."
         return f"Stored — KB now has {len(kb[user_id]['texts'])} passage(s)."
@@ -128,7 +139,11 @@ def store_doc(doc_text: str, user_id="demo"):
         return f"Error during storing: {e}"
 import traceback
-def answer(system: str, context: str, question: str, user_id="demo", history="None"):
     """UI callback: retrieve, build prompt with Qwen tags, generate answer."""
     try:
         if not question.strip():
@@ -175,6 +190,10 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
             **tokens,
             max_new_tokens=512,
             max_length=MAX_PROMPT_TOKENS + 512,
         )
         full   = tokenizer.decode(output[0], skip_special_tokens=True)
         reply  = full.split("<|im_start|>assistant")[-1].strip()
@@ -203,7 +222,13 @@ with gr.Blocks() as demo:
     with gr.Row():
         passage_box = gr.Textbox(lines=6, label="Reference passage")
         user_id_box  = gr.Textbox(value="demo", label="User ID")
-        store_btn   = gr.Button("Store passage")
         clear_btn = gr.Button("Clear KB")
     status_box = gr.Markdown()
@@ -216,16 +241,27 @@ with gr.Blocks() as demo:
     # ---- Q & A ----
     question_box = gr.Textbox(lines=2, label="Ask a question")
-    history_cb = gr.Textbox(value="None", label="Use chat history")
-    system_box = gr.Textbox(lines=2, label="System prompt")
-    context_box = gr.Textbox(lines=6, label="Context passages (each line one passage)")
-    answer_btn   = gr.Button("Answer")
-    answer_box   = gr.Textbox(lines=6, label="Assistant reply")
     answer_btn.click(
         fn=answer,
-        inputs=[system_box, context_box, question_box, user_id_box, history_cb],
         outputs=answer_box
     )

 EMB_MODEL_ID  = "mixedbread-ai/mxbai-embed-large-v1"
 MAX_PROMPT_TOKENS = 8192
+# ---------- new defaults & helper ------------------
+DEFAULT_TEMP        = 0.7
+DEFAULT_TOP_P       = 0.9
+DEFAULT_TOP_K_TOK   = 40          # token-level sampling
+DEFAULT_CHUNK_SIZE  = 512         # characters
+DEFAULT_CHUNK_OVERLAP = 128
+def chunk_text(text: str, size: int, overlap: int):
+    """Yield sliding-window chunks of *text* with character overlap."""
+    for start in range(0, len(text), size - overlap):
+        yield text[start : start + size]
 # --- lazy loaders (unchanged) -------------------------------------------------
 tokenizer, chat_model = None, None
 emb_tokenizer, emb_model = None, None
 kb = defaultdict(lambda: {"texts": [], "vecs": None})
+def add_docs(user_id: str,docs: list[str],chunk_size: int = DEFAULT_CHUNK_SIZE,chunk_overlap: int = DEFAULT_CHUNK_OVERLAP) -> int:
+    # ---------- NEW ----------
+    chunks = []
+    for d in docs:
+        chunks.extend(chunk_text(d, chunk_size, chunk_overlap))
+    docs = [c for c in chunks if c.strip()]
     load_embedder()                                # lazy-load once
     new_vecs = torch.stack([embed(t) for t in docs]).cpu()
     store = kb[user_id]                            # auto-creates via defaultdict
     return prompt
 # ---------- 4. Gradio playground (same UI as before) --------------------------
+def store_doc(doc_text: str,user_id="demo",chunk_size=DEFAULT_CHUNK_SIZE,chunk_overlap=DEFAULT_CHUNK_OVERLAP):
     try:
+        n = add_docs(user_id, [doc_text], chunk_size, chunk_overlap)
         if n == 0:
             return "Nothing stored (empty input)."
         return f"Stored — KB now has {len(kb[user_id]['texts'])} passage(s)."
         return f"Error during storing: {e}"
 import traceback
+def answer(system: str, context: str, question: str,
+           user_id="demo", history="None",
+           temperature=DEFAULT_TEMP,
+           top_p=DEFAULT_TOP_P,
+           top_k_tok=DEFAULT_TOP_K_TOK):
     """UI callback: retrieve, build prompt with Qwen tags, generate answer."""
     try:
         if not question.strip():
             **tokens,
             max_new_tokens=512,
             max_length=MAX_PROMPT_TOKENS + 512,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k_tok
         )
         full   = tokenizer.decode(output[0], skip_special_tokens=True)
         reply  = full.split("<|im_start|>assistant")[-1].strip()
     with gr.Row():
         passage_box = gr.Textbox(lines=6, label="Reference passage")
         user_id_box  = gr.Textbox(value="demo", label="User ID")
+        chunk_box    = gr.Slider(128, 2048, value=DEFAULT_CHUNK_SIZE,
+                                 step=64, label="Chunk size (chars)")
+        overlap_box  = gr.Slider(0, 1024, value=DEFAULT_CHUNK_OVERLAP,
+                                 step=32, label="Chunk overlap")
+        store_btn.click(fn=store_doc,
+            inputs=[passage_box, user_id_box, chunk_box, overlap_box],
+            outputs=status_box)
         clear_btn = gr.Button("Clear KB")
     status_box = gr.Markdown()
     # ---- Q & A ----
     question_box = gr.Textbox(lines=2, label="Ask a question")
+    history_cb   = gr.Textbox(value="None", label="Use chat history")
+    system_box   = gr.Textbox(lines=2, label="System prompt")
+    context_box  = gr.Textbox(lines=6, label="Context passages")
+    # ---------- NEW sampling sliders ----------
+    temp_box   = gr.Slider(0.0, 1.5, value=DEFAULT_TEMP,
+                           step=0.05, label="Temperature")
+    topp_box   = gr.Slider(0.0, 1.0, value=DEFAULT_TOP_P,
+                           step=0.01, label="Top-p")
+    topk_box   = gr.Slider(1, 100, value=DEFAULT_TOP_K_TOK,
+                           step=1, label="Top-k (tokens)")
+    # ---------- /NEW ----------
+    answer_btn  = gr.Button("Answer")
+    answer_box  = gr.Textbox(lines=6, label="Assistant reply")
     answer_btn.click(
         fn=answer,
+        inputs=[system_box, context_box, question_box,
+                user_id_box, history_cb,
+                temp_box, topp_box, topk_box],
         outputs=answer_box
     )