Emmanuel Acheampong Claude Sonnet 4.6 commited on
Commit
d3a0294
Β·
1 Parent(s): 3862a2c

Update models, rename to Document Analysis, remove broken logo image

Browse files
Files changed (1) hide show
  1. app.py +50 -34
app.py CHANGED
@@ -12,7 +12,16 @@ from openai import OpenAI
12
  # ── Crusoe Foundry client ─────────────────────────────────────────────────────
13
  CRUSOE_API_KEY = os.environ.get("CRUSOE_API_KEY", "YOUR_API_KEY_HERE")
14
  CRUSOE_BASE_URL = os.environ.get("CRUSOE_BASE_URL", "https://managed-inference-api-proxy.crusoecloud.com/v1/")
15
- MODEL = os.environ.get("CRUSOE_MODEL", "llama-3.1-405b-instruct")
 
 
 
 
 
 
 
 
 
16
 
17
  client = OpenAI(api_key=CRUSOE_API_KEY, base_url=CRUSOE_BASE_URL)
18
 
@@ -62,12 +71,13 @@ def get_cache_key(context: str) -> str:
62
 
63
 
64
  # ── Shared chat logic ─────────────────────────────────────────────────────────
65
- def stream_response(system_prompt: str, history: list, user_msg: str):
66
  """
67
  Streams a response from Crusoe Foundry.
68
  Returns (updated_history, token_info_str, latency_str, error_str)
69
  history is a list of {"role": "user"|"assistant", "content": str} dicts (Gradio 6.x format).
70
  """
 
71
  messages = [{"role": "system", "content": system_prompt}]
72
  for msg in history:
73
  messages.append({"role": msg["role"], "content": msg["content"]})
@@ -81,7 +91,7 @@ def stream_response(system_prompt: str, history: list, user_msg: str):
81
  reply = ""
82
  try:
83
  stream = client.chat.completions.create(
84
- model=MODEL,
85
  messages=messages,
86
  stream=True,
87
  max_tokens=2048,
@@ -129,20 +139,20 @@ def legal_ingest(files):
129
  )
130
 
131
 
132
- def legal_chat(user_msg, history):
133
  if not user_msg.strip():
134
  yield history, "β€”", "β€”", ""
135
  return
136
  doc_context = legal_doc_store["text"]
137
  system = (
138
- "You are an expert legal analyst with access to the full text of the uploaded documents. "
139
  "Answer questions precisely, citing relevant sections when possible. "
140
  "If a question cannot be answered from the document, say so clearly.\n\n"
141
  f"=== DOCUMENT CONTEXT ===\n{doc_context}\n=== END CONTEXT ==="
142
  if doc_context
143
- else "You are a helpful legal assistant. No documents have been loaded yet."
144
  )
145
- yield from stream_response(system, history, user_msg)
146
 
147
 
148
  # ─────────────────────────────────────────────────────────────────────────────
@@ -167,7 +177,7 @@ def dev_ingest(files, raw_paste):
167
  )
168
 
169
 
170
- def dev_chat(user_msg, history):
171
  if not user_msg.strip():
172
  yield history, "β€”", "β€”", ""
173
  return
@@ -180,7 +190,7 @@ def dev_chat(user_msg, history):
180
  if code_context
181
  else "You are a helpful coding assistant. No code has been loaded yet."
182
  )
183
- yield from stream_response(system, history, user_msg)
184
 
185
 
186
  # ─────────────────────────────────────────────────────────────────────────────
@@ -219,7 +229,7 @@ def _render_cache_stats():
219
  )
220
 
221
 
222
- def memory_chat(user_msg, history):
223
  if not user_msg.strip():
224
  yield history, "β€”", "β€”", _render_cache_stats(), ""
225
  return
@@ -237,7 +247,7 @@ def memory_chat(user_msg, history):
237
  memory_state["query_count"] += 1
238
  memory_state["total_saved_tokens"] += memory_state["cached_tokens"]
239
 
240
- for history_out, tok_info, latency, err in stream_response(system, history, user_msg):
241
  # Annotate with cache hit badge
242
  cache_badge = "🟒 **Cache HIT (estimated)** β€” context eligible for KV cache reuse" if cached_ctx else "βšͺ No cache"
243
  yield history_out, tok_info, latency, _render_cache_stats(), cache_badge
@@ -263,8 +273,6 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
263
  # ── Header ─────────────────���──────────────────────────────────────────────
264
  gr.HTML("""
265
  <div class="crusoe-header">
266
- <img src="https://crusoe.ai/wp-content/uploads/2023/09/crusoe-logo.svg"
267
- alt="Crusoe" height="40" style="margin-bottom:0.5rem"/>
268
  <h1 style="font-size:1.8rem;font-weight:700;color:#0D1B2A;margin:0">
269
  Infinite Context Demo
270
  </h1>
@@ -275,12 +283,20 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
275
  </div>
276
  """)
277
 
 
 
 
 
 
 
 
 
278
  with gr.Tabs():
279
 
280
  # ── TAB 1: LEGAL ──────────────────────────────────────────────────────
281
- with gr.Tab("βš–οΈ Legal Analysis"):
282
  gr.Markdown(
283
- "Upload contracts, briefs, or regulatory documents β€” ask questions "
284
  "across the **entire document** with no chunking or retrieval needed."
285
  )
286
  with gr.Row():
@@ -300,10 +316,10 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
300
  placeholder="Document text will appear here after loading…",
301
  )
302
  with gr.Column(scale=2):
303
- legal_chatbot = gr.Chatbot(label="Legal Q&A", height=420)
304
  with gr.Row():
305
  legal_input = gr.Textbox(
306
- placeholder="e.g. What are all indemnification carve-outs?",
307
  label="Ask a question",
308
  scale=4,
309
  )
@@ -314,11 +330,11 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
314
  legal_err = gr.Markdown("", visible=False)
315
  gr.Examples(
316
  examples=[
317
- ["What are the termination clauses?"],
318
- ["Summarize all indemnification obligations for each party."],
319
- ["List every deadline or date mentioned in the document."],
320
- ["Are there any non-compete or non-solicitation clauses?"],
321
- ["What happens in the event of a material breach?"],
322
  ],
323
  inputs=legal_input,
324
  )
@@ -329,18 +345,18 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
329
  outputs=[legal_status, legal_token_badge, legal_preview],
330
  )
331
 
332
- def legal_submit(msg, history):
333
- yield from legal_chat(msg, history)
334
 
335
  legal_send.click(
336
  legal_submit,
337
- inputs=[legal_input, legal_chatbot],
338
  outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
339
  ).then(lambda: "", outputs=legal_input)
340
 
341
  legal_input.submit(
342
  legal_submit,
343
- inputs=[legal_input, legal_chatbot],
344
  outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
345
  ).then(lambda: "", outputs=legal_input)
346
 
@@ -401,18 +417,18 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
401
  outputs=[dev_status, dev_token_badge, dev_preview],
402
  )
403
 
404
- def dev_submit(msg, history):
405
- yield from dev_chat(msg, history)
406
 
407
  dev_send.click(
408
  dev_submit,
409
- inputs=[dev_input, dev_chatbot],
410
  outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
411
  ).then(lambda: "", outputs=dev_input)
412
 
413
  dev_input.submit(
414
  dev_submit,
415
- inputs=[dev_input, dev_chatbot],
416
  outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
417
  ).then(lambda: "", outputs=dev_input)
418
 
@@ -473,18 +489,18 @@ with gr.Blocks(title="Crusoe Foundry β€” Infinite Context Demo") as demo:
473
  outputs=[memory_cache_status, memory_stats],
474
  )
475
 
476
- def memory_submit(msg, history):
477
- yield from memory_chat(msg, history)
478
 
479
  memory_send.click(
480
  memory_submit,
481
- inputs=[memory_input, memory_chatbot],
482
  outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
483
  ).then(lambda: "", outputs=memory_input)
484
 
485
  memory_input.submit(
486
  memory_submit,
487
- inputs=[memory_input, memory_chatbot],
488
  outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
489
  ).then(lambda: "", outputs=memory_input)
490
 
 
12
  # ── Crusoe Foundry client ─────────────────────────────────────────────────────
13
  CRUSOE_API_KEY = os.environ.get("CRUSOE_API_KEY", "YOUR_API_KEY_HERE")
14
  CRUSOE_BASE_URL = os.environ.get("CRUSOE_BASE_URL", "https://managed-inference-api-proxy.crusoecloud.com/v1/")
15
+ AVAILABLE_MODELS = [
16
+ "Qwen/Qwen3-235B-A22B-Instruct-2507",
17
+ "deepseek-ai/DeepSeek-R1-0528",
18
+ "moonshotai/Kimi-K2-Thinking",
19
+ "deepseek-ai/DeepSeek-V3-0324",
20
+ "meta-llama/Llama-3.3-70B-Instruct",
21
+ "openai/gpt-oss-120b",
22
+ "google/gemma-3-12b-it",
23
+ ]
24
+ MODEL = os.environ.get("CRUSOE_MODEL", AVAILABLE_MODELS[0])
25
 
26
  client = OpenAI(api_key=CRUSOE_API_KEY, base_url=CRUSOE_BASE_URL)
27
 
 
71
 
72
 
73
  # ── Shared chat logic ─────────────────────────────────────────────────────────
74
+ def stream_response(system_prompt: str, history: list, user_msg: str, model: str = None):
75
  """
76
  Streams a response from Crusoe Foundry.
77
  Returns (updated_history, token_info_str, latency_str, error_str)
78
  history is a list of {"role": "user"|"assistant", "content": str} dicts (Gradio 6.x format).
79
  """
80
+ model = model or MODEL
81
  messages = [{"role": "system", "content": system_prompt}]
82
  for msg in history:
83
  messages.append({"role": msg["role"], "content": msg["content"]})
 
91
  reply = ""
92
  try:
93
  stream = client.chat.completions.create(
94
+ model=model,
95
  messages=messages,
96
  stream=True,
97
  max_tokens=2048,
 
139
  )
140
 
141
 
142
+ def legal_chat(user_msg, history, model):
143
  if not user_msg.strip():
144
  yield history, "β€”", "β€”", ""
145
  return
146
  doc_context = legal_doc_store["text"]
147
  system = (
148
+ "You are an expert analyst with access to the full text of the uploaded documents. "
149
  "Answer questions precisely, citing relevant sections when possible. "
150
  "If a question cannot be answered from the document, say so clearly.\n\n"
151
  f"=== DOCUMENT CONTEXT ===\n{doc_context}\n=== END CONTEXT ==="
152
  if doc_context
153
+ else "You are a helpful document analyst. No documents have been loaded yet."
154
  )
155
+ yield from stream_response(system, history, user_msg, model)
156
 
157
 
158
  # ─────────────────────────────────────────────────────────────────────────────
 
177
  )
178
 
179
 
180
+ def dev_chat(user_msg, history, model):
181
  if not user_msg.strip():
182
  yield history, "β€”", "β€”", ""
183
  return
 
190
  if code_context
191
  else "You are a helpful coding assistant. No code has been loaded yet."
192
  )
193
+ yield from stream_response(system, history, user_msg, model)
194
 
195
 
196
  # ─────────────────────────────────────────────────────────────────────────────
 
229
  )
230
 
231
 
232
+ def memory_chat(user_msg, history, model):
233
  if not user_msg.strip():
234
  yield history, "β€”", "β€”", _render_cache_stats(), ""
235
  return
 
247
  memory_state["query_count"] += 1
248
  memory_state["total_saved_tokens"] += memory_state["cached_tokens"]
249
 
250
+ for history_out, tok_info, latency, err in stream_response(system, history, user_msg, model):
251
  # Annotate with cache hit badge
252
  cache_badge = "🟒 **Cache HIT (estimated)** β€” context eligible for KV cache reuse" if cached_ctx else "βšͺ No cache"
253
  yield history_out, tok_info, latency, _render_cache_stats(), cache_badge
 
273
  # ── Header ─────────────────���──────────────────────────────────────────────
274
  gr.HTML("""
275
  <div class="crusoe-header">
 
 
276
  <h1 style="font-size:1.8rem;font-weight:700;color:#0D1B2A;margin:0">
277
  Infinite Context Demo
278
  </h1>
 
283
  </div>
284
  """)
285
 
286
+ with gr.Row():
287
+ model_selector = gr.Dropdown(
288
+ choices=AVAILABLE_MODELS,
289
+ value=MODEL,
290
+ label="Model",
291
+ scale=2,
292
+ )
293
+
294
  with gr.Tabs():
295
 
296
  # ── TAB 1: LEGAL ──────────────────────────────────────────────────────
297
+ with gr.Tab("πŸ“„ Document Analysis"):
298
  gr.Markdown(
299
+ "Upload any documents β€” ask questions "
300
  "across the **entire document** with no chunking or retrieval needed."
301
  )
302
  with gr.Row():
 
316
  placeholder="Document text will appear here after loading…",
317
  )
318
  with gr.Column(scale=2):
319
+ legal_chatbot = gr.Chatbot(label="Document Q&A", height=420)
320
  with gr.Row():
321
  legal_input = gr.Textbox(
322
+ placeholder="e.g. Summarize the key points of this document.",
323
  label="Ask a question",
324
  scale=4,
325
  )
 
330
  legal_err = gr.Markdown("", visible=False)
331
  gr.Examples(
332
  examples=[
333
+ ["Summarize the key points of this document."],
334
+ ["What are the main topics covered?"],
335
+ ["List every date or deadline mentioned."],
336
+ ["What conclusions or recommendations are made?"],
337
+ ["Extract all named entities (people, organizations, places)."],
338
  ],
339
  inputs=legal_input,
340
  )
 
345
  outputs=[legal_status, legal_token_badge, legal_preview],
346
  )
347
 
348
+ def legal_submit(msg, history, model):
349
+ yield from legal_chat(msg, history, model)
350
 
351
  legal_send.click(
352
  legal_submit,
353
+ inputs=[legal_input, legal_chatbot, model_selector],
354
  outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
355
  ).then(lambda: "", outputs=legal_input)
356
 
357
  legal_input.submit(
358
  legal_submit,
359
+ inputs=[legal_input, legal_chatbot, model_selector],
360
  outputs=[legal_chatbot, legal_tok_info, legal_latency, legal_err],
361
  ).then(lambda: "", outputs=legal_input)
362
 
 
417
  outputs=[dev_status, dev_token_badge, dev_preview],
418
  )
419
 
420
+ def dev_submit(msg, history, model):
421
+ yield from dev_chat(msg, history, model)
422
 
423
  dev_send.click(
424
  dev_submit,
425
+ inputs=[dev_input, dev_chatbot, model_selector],
426
  outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
427
  ).then(lambda: "", outputs=dev_input)
428
 
429
  dev_input.submit(
430
  dev_submit,
431
+ inputs=[dev_input, dev_chatbot, model_selector],
432
  outputs=[dev_chatbot, dev_tok_info, dev_latency, dev_err],
433
  ).then(lambda: "", outputs=dev_input)
434
 
 
489
  outputs=[memory_cache_status, memory_stats],
490
  )
491
 
492
+ def memory_submit(msg, history, model):
493
+ yield from memory_chat(msg, history, model)
494
 
495
  memory_send.click(
496
  memory_submit,
497
+ inputs=[memory_input, memory_chatbot, model_selector],
498
  outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
499
  ).then(lambda: "", outputs=memory_input)
500
 
501
  memory_input.submit(
502
  memory_submit,
503
+ inputs=[memory_input, memory_chatbot, model_selector],
504
  outputs=[memory_chatbot, memory_tok_info, memory_latency, memory_stats, memory_cache_hit],
505
  ).then(lambda: "", outputs=memory_input)
506