Spaces:

crusoeai
/

doc_analysis

Running

App Files Files Community

Emmanuel Acheampong Claude Sonnet 4.6 commited on about 22 hours ago

Commit

d3a0294

1 Parent(s): 3862a2c

Update models, rename to Document Analysis, remove broken logo image

Browse files

Files changed (1) hide show

app.py +50 -34

app.py CHANGED Viewed

@@ -12,7 +12,16 @@ from openai import OpenAI
 # ── Crusoe Foundry client ─────────────────────────────────────────────────────
 CRUSOE_API_KEY = os.environ.get("CRUSOE_API_KEY", "YOUR_API_KEY_HERE")
 CRUSOE_BASE_URL = os.environ.get("CRUSOE_BASE_URL", "https://managed-inference-api-proxy.crusoecloud.com/v1/")
-MODEL = os.environ.get("CRUSOE_MODEL", "llama-3.1-405b-instruct")
 client = OpenAI(api_key=CRUSOE_API_KEY, base_url=CRUSOE_BASE_URL)
@@ -62,12 +71,13 @@ def get_cache_key(context: str) -> str:
 # ── Shared chat logic ─────────────────────────────────────────────────────────
-def stream_response(system_prompt: str, history: list, user_msg: str):
     """
     Streams a response from Crusoe Foundry.
     Returns (updated_history, token_info_str, latency_str, error_str)
     history is a list of {"role": "user"|"assistant", "content": str} dicts (Gradio 6.x format).
     """
     messages = [{"role": "system", "content": system_prompt}]
     for msg in history:
         messages.append({"role": msg["role"], "content": msg["content"]})
@@ -81,7 +91,7 @@ def stream_response(system_prompt: str, history: list, user_msg: str):
     reply = ""
     try:
         stream = client.chat.completions.create(
-            model=MODEL,
             messages=messages,
             stream=True,
             max_tokens=2048,
@@ -129,20 +139,20 @@ def legal_ingest(files):
     )
-def legal_chat(user_msg, history):
     if not user_msg.strip():
         yield history, "—", "—", ""
         return
     doc_context = legal_doc_store["text"]
     system = (
-        "You are an expert legal analyst with access to the full text of the uploaded documents. "
         "Answer questions precisely, citing relevant sections when possible. "
         "If a question cannot be answered from the document, say so clearly.\n\n"
         f"=== DOCUMENT CONTEXT ===\n{doc_context}\n=== END CONTEXT ==="
         if doc_context
-        else "You are a helpful legal assistant. No documents have been loaded yet."
     )
-    yield from stream_response(system, history, user_msg)
 # ─────────────────────────────────────────────────────────────────────────────
@@ -167,7 +177,7 @@ def dev_ingest(files, raw_paste):
     )
-def dev_chat(user_msg, history):
     if not user_msg.strip():
         yield history, "—", "—", ""
         return
@@ -180,7 +190,7 @@ def dev_chat(user_msg, history):
         if code_context
         else "You are a helpful coding assistant. No code has been loaded yet."
     )
-    yield from stream_response(system, history, user_msg)
 # ─────────────────────────────────────────────────────────────────────────────
@@ -219,7 +229,7 @@ def _render_cache_stats():
     )
-def memory_chat(user_msg, history):
     if not user_msg.strip():
         yield history, "—", "—", _render_cache_stats(), ""
         return
@@ -237,7 +247,7 @@ def memory_chat(user_msg, history):
     memory_state["query_count"] += 1
     memory_state["total_saved_tokens"] += memory_state["cached_tokens"]
-    for history_out, tok_info, latency, err in stream_response(system, history, user_msg):
         # Annotate with cache hit badge
         cache_badge = "🟢 **Cache HIT (estimated)** — context eligible for KV cache reuse" if cached_ctx else "⚪ No cache"
         yield history_out, tok_info, latency, _render_cache_stats(), cache_badge
@@ -263,8 +273,6 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
     # ── Header ─────────────────���──────────────────────────────────────────────
     gr.HTML("""
     <div class="crusoe-header">
-      <img src="https://crusoe.ai/wp-content/uploads/2023/09/crusoe-logo.svg"
-           alt="Crusoe" height="40" style="margin-bottom:0.5rem"/>
       <h1 style="font-size:1.8rem;font-weight:700;color:#0D1B2A;margin:0">
         Infinite Context Demo
       </h1>
@@ -275,12 +283,20 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
     </div>
     """)
     with gr.Tabs():
         # ── TAB 1: LEGAL ──────────────────────────────────────────────────────
-        with gr.Tab("⚖️ Legal Analysis"):
             gr.Markdown(
-                "Upload contracts, briefs, or regulatory documents — ask questions "
                 "across the **entire document** with no chunking or retrieval needed."
             )
             with gr.Row():
@@ -300,10 +316,10 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
                         placeholder="Document text will appear here after loading…",
                     )
                 with gr.Column(scale=2):
-                    legal_chatbot = gr.Chatbot(label="Legal Q&A", height=420)
                     with gr.Row():
                         legal_input = gr.Textbox(
-                            placeholder="e.g. What are all indemnification carve-outs?",
                             label="Ask a question",
                             scale=4,
                         )
@@ -314,11 +330,11 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
                     legal_err = gr.Markdown("", visible=False)
                     gr.Examples(
                         examples=[
-                            ["What are the termination clauses?"],
-                            ["Summarize all indemnification obligations for each party."],
-                            ["List every deadline or date mentioned in the document."],
-                            ["Are there any non-compete or non-solicitation clauses?"],
-                            ["What happens in the event of a material breach?"],
                         ],
                         inputs=legal_input,
                     )
@@ -329,18 +345,18 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
                 outputs=[legal_status, legal_token_badge, legal_preview],
             )
-            def legal_submit(msg, history):
-                yield from legal_chat(msg, history)
             legal_send.click(
                 legal_submit,
-                inputs=[legal_input, legal_chatbot],
                 outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
             ).then(lambda: "", outputs=legal_input)
             legal_input.submit(
                 legal_submit,
-                inputs=[legal_input, legal_chatbot],
                 outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
             ).then(lambda: "", outputs=legal_input)
@@ -401,18 +417,18 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
                 outputs=[dev_status, dev_token_badge, dev_preview],
             )
-            def dev_submit(msg, history):
-                yield from dev_chat(msg, history)
             dev_send.click(
                 dev_submit,
-                inputs=[dev_input, dev_chatbot],
                 outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
             ).then(lambda: "", outputs=dev_input)
             dev_input.submit(
                 dev_submit,
-                inputs=[dev_input, dev_chatbot],
                 outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
             ).then(lambda: "", outputs=dev_input)
@@ -473,18 +489,18 @@ with gr.Blocks(title="Crusoe Foundry — Infinite Context Demo") as demo:
                 outputs=[memory_cache_status, memory_stats],
             )
-            def memory_submit(msg, history):
-                yield from memory_chat(msg, history)
             memory_send.click(
                 memory_submit,
-                inputs=[memory_input, memory_chatbot],
                 outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
             ).then(lambda: "", outputs=memory_input)
             memory_input.submit(
                 memory_submit,
-                inputs=[memory_input, memory_chatbot],
                 outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
             ).then(lambda: "", outputs=memory_input)

 # ── Crusoe Foundry client ─────────────────────────────────────────────────────
 CRUSOE_API_KEY = os.environ.get("CRUSOE_API_KEY", "YOUR_API_KEY_HERE")
 CRUSOE_BASE_URL = os.environ.get("CRUSOE_BASE_URL", "https://managed-inference-api-proxy.crusoecloud.com/v1/")
+AVAILABLE_MODELS = [
+    "Qwen/Qwen3-235B-A22B-Instruct-2507",
+    "deepseek-ai/DeepSeek-R1-0528",
+    "moonshotai/Kimi-K2-Thinking",
+    "deepseek-ai/DeepSeek-V3-0324",
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "openai/gpt-oss-120b",
+    "google/gemma-3-12b-it",
+]
+MODEL = os.environ.get("CRUSOE_MODEL", AVAILABLE_MODELS[0])
 client = OpenAI(api_key=CRUSOE_API_KEY, base_url=CRUSOE_BASE_URL)
 # ── Shared chat logic ─────────────────────────────────────────────────────────
+def stream_response(system_prompt: str, history: list, user_msg: str, model: str = None):
     """
     Streams a response from Crusoe Foundry.
     Returns (updated_history, token_info_str, latency_str, error_str)
     history is a list of {"role": "user"|"assistant", "content": str} dicts (Gradio 6.x format).
     """
+    model = model or MODEL
     messages = [{"role": "system", "content": system_prompt}]
     for msg in history:
         messages.append({"role": msg["role"], "content": msg["content"]})
     reply = ""
     try:
         stream = client.chat.completions.create(
+            model=model,
             messages=messages,
             stream=True,
             max_tokens=2048,
     )
+def legal_chat(user_msg, history, model):
     if not user_msg.strip():
         yield history, "—", "—", ""
         return
     doc_context = legal_doc_store["text"]
     system = (
+        "You are an expert analyst with access to the full text of the uploaded documents. "
         "Answer questions precisely, citing relevant sections when possible. "
         "If a question cannot be answered from the document, say so clearly.\n\n"
         f"=== DOCUMENT CONTEXT ===\n{doc_context}\n=== END CONTEXT ==="
         if doc_context
+        else "You are a helpful document analyst. No documents have been loaded yet."
     )
+    yield from stream_response(system, history, user_msg, model)
 # ─────────────────────────────────────────────────────────────────────────────
     )
+def dev_chat(user_msg, history, model):
     if not user_msg.strip():
         yield history, "—", "—", ""
         return
         if code_context
         else "You are a helpful coding assistant. No code has been loaded yet."
     )
+    yield from stream_response(system, history, user_msg, model)
 # ─────────────────────────────────────────────────────────────────────────────
     )
+def memory_chat(user_msg, history, model):
     if not user_msg.strip():
         yield history, "—", "—", _render_cache_stats(), ""
         return
     memory_state["query_count"] += 1
     memory_state["total_saved_tokens"] += memory_state["cached_tokens"]
+    for history_out, tok_info, latency, err in stream_response(system, history, user_msg, model):
         # Annotate with cache hit badge
         cache_badge = "🟢 **Cache HIT (estimated)** — context eligible for KV cache reuse" if cached_ctx else "⚪ No cache"
         yield history_out, tok_info, latency, _render_cache_stats(), cache_badge
     # ── Header ─────────────────���──────────────────────────────────────────────
     gr.HTML("""
     <div class="crusoe-header">
       <h1 style="font-size:1.8rem;font-weight:700;color:#0D1B2A;margin:0">
         Infinite Context Demo
       </h1>
     </div>
     """)
+    with gr.Row():
+        model_selector = gr.Dropdown(
+            choices=AVAILABLE_MODELS,
+            value=MODEL,
+            label="Model",
+            scale=2,
+        )
     with gr.Tabs():
         # ── TAB 1: LEGAL ──────────────────────────────────────────────────────
+        with gr.Tab("📄 Document Analysis"):
             gr.Markdown(
+                "Upload any documents — ask questions "
                 "across the **entire document** with no chunking or retrieval needed."
             )
             with gr.Row():
                         placeholder="Document text will appear here after loading…",
                     )
                 with gr.Column(scale=2):
+                    legal_chatbot = gr.Chatbot(label="Document Q&A", height=420)
                     with gr.Row():
                         legal_input = gr.Textbox(
+                            placeholder="e.g. Summarize the key points of this document.",
                             label="Ask a question",
                             scale=4,
                         )
                     legal_err = gr.Markdown("", visible=False)
                     gr.Examples(
                         examples=[
+                            ["Summarize the key points of this document."],
+                            ["What are the main topics covered?"],
+                            ["List every date or deadline mentioned."],
+                            ["What conclusions or recommendations are made?"],
+                            ["Extract all named entities (people, organizations, places)."],
                         ],
                         inputs=legal_input,
                     )
                 outputs=[legal_status, legal_token_badge, legal_preview],
             )
+            def legal_submit(msg, history, model):
+                yield from legal_chat(msg, history, model)
             legal_send.click(
                 legal_submit,
+                inputs=[legal_input, legal_chatbot, model_selector],
                 outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
             ).then(lambda: "", outputs=legal_input)
             legal_input.submit(
                 legal_submit,
+                inputs=[legal_input, legal_chatbot, model_selector],
                 outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
             ).then(lambda: "", outputs=legal_input)
                 outputs=[dev_status, dev_token_badge, dev_preview],
             )
+            def dev_submit(msg, history, model):
+                yield from dev_chat(msg, history, model)
             dev_send.click(
                 dev_submit,
+                inputs=[dev_input, dev_chatbot, model_selector],
                 outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
             ).then(lambda: "", outputs=dev_input)
             dev_input.submit(
                 dev_submit,
+                inputs=[dev_input, dev_chatbot, model_selector],
                 outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
             ).then(lambda: "", outputs=dev_input)
                 outputs=[memory_cache_status, memory_stats],
             )
+            def memory_submit(msg, history, model):
+                yield from memory_chat(msg, history, model)
             memory_send.click(
                 memory_submit,
+                inputs=[memory_input, memory_chatbot, model_selector],
                 outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
             ).then(lambda: "", outputs=memory_input)
             memory_input.submit(
                 memory_submit,
+                inputs=[memory_input, memory_chatbot, model_selector],
                 outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
             ).then(lambda: "", outputs=memory_input)