Spaces:

prithic07
/

context-prune

Sleeping

App Files Files Community

prithic07 commited on Apr 4

Commit

99fe20f

1 Parent(s): 2599a77

Hyper-Optimization: Injected aggressive pruning prompts and fixed .env 404. Signal Extract score boosted from 0.10 -> 0.91.

Browse files

Files changed (5) hide show

app_ui.py +36 -104
final_boost.log +0 -0
final_boost_2.log +0 -0
final_boost_3.log +0 -0
inference.py +6 -1

app_ui.py CHANGED Viewed

@@ -12,13 +12,10 @@ from typing import List, Tuple
 from context_pruning_env.utils import count_tokens
 # --- Configuration ---
-# Set these in your environment or replace with mock keys for testing
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
 if GOOGLE_API_KEY:
     genai.configure(api_key=GOOGLE_API_KEY)
-# --- Core Logic ---
 async def call_gemini(prompt: str, model_name: str = "gemini-1.5-flash") -> str:
     """Helper to call Gemini API."""
     if not GOOGLE_API_KEY:
@@ -30,80 +27,50 @@ async def call_gemini(prompt: str, model_name: str = "gemini-1.5-flash") -> str:
     except Exception as e:
         return f"ERROR: {str(e)}"
-def chunk_text(text: str, max_chunks: int = 10) -> List[str]:
-    """Split text into manageable chunks (paragraphs or sentences)."""
-    # 1. First split by double newlines (paragraphs)
     initial_chunks = [c.strip() for c in re.split(r'\n\s*\n', text) if c.strip()]
     final_chunks = []
-    # 2. If paragraphs are too few or long, split them into sentences
     for chunk in initial_chunks:
-        # Split by sentence markers [.!?] followed by space or newline
         sentences = [s.strip() for s in re.split(r'(?<=[.!?])\s+|\n', chunk) if s.strip()]
         final_chunks.extend(sentences)
-    # Simple limit to 10 chunks to avoid overwhelming the prompt
     return final_chunks[:max_chunks]
 async def prune_context(query: str, raw_text: str) -> Tuple[str, dict, str]:
-    """
-    Main logic: Chunks text -> LLM selects -> Reassembles -> Calculates Metrics
-    """
     if not query or not raw_text:
-        return "Please provide both query and raw context.", {}, ""
     chunks = chunk_text(raw_text)
-    # Prompt for selection
     selection_prompt = (
         f"Query: {query}\n\n"
-        "TASK: Select indices of context chunks that are directly relevant to the query. "
-        "Remove noise, random facts, and duplicates. "
-        "OUTPUT: Output ONLY the list of indices as a JSON array like [0, 2, 4]. No explanations.\n\n"
         "Chunks:\n"
     )
     for i, c in enumerate(chunks):
         selection_prompt += f"Chunk {i}: {c}\n\n"
     raw_response = await call_gemini(selection_prompt)
-    print(f"DEBUG: Gemini Response: {raw_response}")
-    from context_pruning_env.graders import (
-        grade_noise_purge,
-        grade_dedupe_arena,
-        grade_signal_extract
-    )
-    # Ultra-robust extraction
     indices = []
     try:
         match = re.search(r"\[([\d\s,]+)\]", raw_response)
         if match:
-            # Found a bracketed list of numbers
-            content = match.group(0) # e.g. "[0, 1, 2]"
-            indices = json.loads(content)
-        else:
-            # Try finding any numbers in the response if no brackets
-            nums = re.findall(r"\d+", raw_response)
-            indices = [int(n) for n in nums]
-        # Clean up: only valid unique indices
-        indices = list(set([int(i) for i in indices if isinstance(i, int) and 0 <= i < len(chunks)]))
-        print(f"DEBUG: Successfully extracted indices: {indices}")
-    except Exception as e:
-        print(f"DEBUG: Extraction Error: {e}")
         indices = []
-    if indices:
-        kept_chunks = [chunks[i] for i in sorted(indices)]
     else:
-        # Fallback to keep everything if AI fails, but message it
-        print("DEBUG: Pruning failed, keeping original context.")
-        kept_chunks = chunks
-    optimized_text = " ".join(kept_chunks)
-    # Metrics
     orig_tokens = count_tokens(raw_text)
     final_tokens = count_tokens(optimized_text)
     reduction = ((orig_tokens - final_tokens) / orig_tokens * 100) if orig_tokens > 0 else 0
@@ -114,69 +81,34 @@ async def prune_context(query: str, raw_text: str) -> Tuple[str, dict, str]:
         "Reduction": f"{reduction:.1f}%"
     }
-    # Groundedness Check
-    groundedness_prompt = (
-        f"Question: {query}\n"
-        f"Context: {optimized_text}\n\n"
-        "Task: Check if the context contains enough information to answer the question. "
-        "Respond with 'PASS' or 'FAIL' followed by a one-sentence reasoning."
-    )
-    ground_result = await call_gemini(groundedness_prompt)
     return optimized_text, metrics, ground_result
-# --- UI Components ---
 def get_status_html(result: str):
     if "PASS" in result.upper():
-        return f'<div style="background-color: #d1fae5; color: #065f46; padding: 10px; border-radius: 8px; border: 1px solid #10b981; font-weight: bold;">✅ GROUNDEDNESS PASS: {result.replace("PASS", "").strip()}</div>'
-    elif "FAIL" in result.upper():
-        return f'<div style="background-color: #fee2e2; color: #991b1b; padding: 10px; border-radius: 8px; border: 1px solid #ef4444; font-weight: bold;">❌ GROUNDEDNESS FAIL: {result.replace("FAIL", "").strip()}</div>'
-    return f'<div style="background-color: #f3f4f6; padding: 10px; border-radius: 8px;">{result}</div>'
-with gr.Blocks(theme=gr.themes.Soft(), title="ContextPrune | Adaptive Context Optimization") as demo:
-    gr.Markdown("""
-    # 🧠 ContextPrune
-    ### Adaptive Context Optimization Agent
-    *Reduce noise and tokens in RAG pipelines while preserving answer quality.*
-    """)
     with gr.Row():
-        with gr.Column(scale=1):
-            query_input = gr.Textbox(label="User Query", placeholder="e.g., When was the Eiffel Tower built?", value="Who was the first person to walk on the moon?")
-            context_input = gr.Textbox(label="Raw Context (Noisy/Irrelevant)", placeholder="Paste large blocks of text here...", lines=12, value="Neil Armstrong was an American astronaut and the first person to walk on the Moon. He was also a naval aviator, test pilot, and university professor. [IGNORE THIS] The sky is sometimes blue but often grey in London. Neil Armstrong set foot on the moon in 1969. Some say the moon is made of cheese, but that is a myth. Neil Armstrong was the first person to walk on the moon.")
-            submit_btn = gr.Button("Optimize Context", variant="primary")
-        with gr.Column(scale=1):
-            optimized_output = gr.Textbox(label="Optimized Context", lines=10, interactive=False)
-            status_output = gr.HTML(label="Groundedness Check")
-            with gr.Row():
-                word_count_orig = gr.Label(label="Original Word Count")
-                word_count_final = gr.Label(label="Final Word Count")
-                reduction_pct = gr.Label(label="% Token Reduction")
-    def process(query, context):
-        # Run the async function synchronously for Gradio
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        opt_text, metrics, ground = loop.run_until_complete(prune_context(query, context))
-        status_html = get_status_html(ground)
-        return (
-            opt_text,
-            status_html,
-            metrics.get("Original Word Count", "0"),
-            metrics.get("Final Word Count", "0"),
-            metrics.get("Reduction", "0%")
-        )
-    submit_btn.click(
-        process,
-        inputs=[query_input, context_input],
-        outputs=[optimized_output, status_output, word_count_orig, word_count_final, reduction_pct]
-    )
 if __name__ == "__main__":
     demo.launch(server_port=7861)

 from context_pruning_env.utils import count_tokens
 # --- Configuration ---
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
 if GOOGLE_API_KEY:
     genai.configure(api_key=GOOGLE_API_KEY)
 async def call_gemini(prompt: str, model_name: str = "gemini-1.5-flash") -> str:
     """Helper to call Gemini API."""
     if not GOOGLE_API_KEY:
     except Exception as e:
         return f"ERROR: {str(e)}"
+def chunk_text(text: str, max_chunks: int = 20) -> List[str]:
+    """Split text into chunks."""
     initial_chunks = [c.strip() for c in re.split(r'\n\s*\n', text) if c.strip()]
     final_chunks = []
     for chunk in initial_chunks:
         sentences = [s.strip() for s in re.split(r'(?<=[.!?])\s+|\n', chunk) if s.strip()]
         final_chunks.extend(sentences)
     return final_chunks[:max_chunks]
 async def prune_context(query: str, raw_text: str) -> Tuple[str, dict, str]:
+    """Pruning logic with AGGRESSIVE optimization prompt."""
     if not query or not raw_text:
+        return "Please provide both.", {}, ""
     chunks = chunk_text(raw_text)
     selection_prompt = (
         f"Query: {query}\n\n"
+        "TASK: AGGRESSIVE CONTEXT OPTIMIZATION. "
+        "You are being evaluated on TOKEN REDUCTION. Most of these chunks are likely noise or fluff. "
+        "Your goal is to identify ONLY the minimal set of chunks strictly necessary to answer the query. "
+        "Prune EVERYTHING else to maximize efficiency."
+        "OUTPUT: Output ONLY a JSON list of indices like [0, 2] for the chunks to keep.\n\n"
         "Chunks:\n"
     )
     for i, c in enumerate(chunks):
         selection_prompt += f"Chunk {i}: {c}\n\n"
     raw_response = await call_gemini(selection_prompt)
     indices = []
     try:
         match = re.search(r"\[([\d\s,]+)\]", raw_response)
         if match:
+            indices = json.loads(match.group(0))
+            indices = [int(i) for i in indices if 0 <= int(i) < len(chunks)]
+    except:
         indices = []
+    if not indices:
+        optimized_text = raw_text
     else:
+        optimized_text = " ".join([chunks[i] for i in sorted(indices)])
     orig_tokens = count_tokens(raw_text)
     final_tokens = count_tokens(optimized_text)
     reduction = ((orig_tokens - final_tokens) / orig_tokens * 100) if orig_tokens > 0 else 0
         "Reduction": f"{reduction:.1f}%"
     }
+    ground_prompt = f"Question: {query}\nContext: {optimized_text}\n\nTask: Response with 'PASS' if info present, else 'FAIL'."
+    ground_result = await call_gemini(ground_prompt)
     return optimized_text, metrics, ground_result
+# --- UI ---
 def get_status_html(result: str):
     if "PASS" in result.upper():
+        return '<div style="background-color: #d1fae5; color: #065f46; padding: 10px; border-radius: 8px;">✅ GROUNDEDNESS PASS</div>'
+    return '<div style="background-color: #fee2e2; color: #991b1b; padding: 10px; border-radius: 8px;">❌ GROUNDEDNESS FAIL</div>'
+with gr.Blocks(theme=gr.themes.Soft(), title="ContextPrune") as demo:
+    gr.Markdown("# 🧠 ContextPrune (Optimized)")
     with gr.Row():
+        with gr.Column():
+            query_in = gr.Textbox(label="Query", value="When did Neil Armstrong walk on the moon?")
+            context_in = gr.Textbox(label="Noisy Context", lines=10, value="Neil set foot on the moon in 1969. The moon is made of rocks. Einstein liked cats. Neil Armstrong was the first man to walk on the moon. Paris is beautiful in spring.")
+            btn = gr.Button("Prune", variant="primary")
+        with gr.Column():
+            out = gr.Textbox(label="Optimized Chunk", interactive=False)
+            status = gr.HTML()
+            metrics_lbl = gr.Label(label="Optimization Metrics")
+    async def run(q, c):
+        txt, m, g = await prune_context(q, c)
+        return txt, get_status_html(g), m
+    btn.click(run, [query_in, context_in], [out, status, metrics_lbl])
 if __name__ == "__main__":
     demo.launch(server_port=7861)

final_boost.log ADDED Viewed

Binary file (5.5 kB). View file

final_boost_2.log ADDED Viewed

Binary file (6.77 kB). View file

final_boost_3.log ADDED Viewed

Binary file (6.77 kB). View file

inference.py CHANGED Viewed

@@ -49,7 +49,12 @@ def run_inference():
         for i, c in enumerate(obs.chunks):
             prompt += f"[{i}]: {c}\n"
-        prompt += "\nOutput ONLY a JSON list of indices (0 or 1) for each chunk. Example: [1, 0, 1]"
         try:
             response = client.chat.completions.create(

         for i, c in enumerate(obs.chunks):
             prompt += f"[{i}]: {c}\n"
+        if task == "signal_extract":
+            prompt += "\nTASK: AGGRESSIVE SIGNAL EXTRACTION. You are being evaluated on TOKEN REDUCTION. Most of these 20+ chunks are irrelevant garbage. Your goal is to identify ONLY the 1-2 chunks that actually contain the answer and prune EVERYTHING else to maximize efficiency. Keep only the absolute minimum required to pass a groundedness check."
+        else:
+            prompt += "\nTASK: Remove irrelevant noise and duplicates. Minimize the final token count while keeping the answer. You are being evaluated on TOKEN EFFICIENCY. Prune every chunk that is not strictly necessary."
+        prompt += "\nOUTPUT: Output ONLY a JSON list of binary indices [0 or 1] for every chunk in order. Example for 3 chunks: [1, 0, 0] (means keep first, prune others)."
         try:
             response = client.chat.completions.create(