Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
import gradio as gr
|
| 5 |
-
from typing import List, Dict, Any
|
| 6 |
import requests
|
| 7 |
from datetime import datetime
|
| 8 |
import ast
|
|
@@ -275,13 +275,13 @@ def call_llm(messages: List[Dict], temperature: float = 0.7, max_tokens: int = 5
|
|
| 275 |
except Exception as e:
|
| 276 |
return f"Error calling LLM: {str(e)}"
|
| 277 |
|
| 278 |
-
def think_only_mode(question: str) ->
|
| 279 |
"""Think-Only mode: Chain-of-Thought only, no tools."""
|
| 280 |
prompt = THINK_ONLY_PROMPT.format(question=question)
|
| 281 |
messages = [{"role": "user", "content": prompt}]
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
| 285 |
|
| 286 |
response = call_llm(messages, temperature=0.7, max_tokens=800)
|
| 287 |
|
|
@@ -289,20 +289,21 @@ def think_only_mode(question: str) -> Generator[str, None, None]:
|
|
| 289 |
for line in lines:
|
| 290 |
if line.strip():
|
| 291 |
if line.strip().startswith('Thought:'):
|
| 292 |
-
|
| 293 |
elif line.strip().startswith('Answer:'):
|
| 294 |
-
|
| 295 |
else:
|
| 296 |
-
|
| 297 |
|
| 298 |
-
|
|
|
|
| 299 |
|
| 300 |
-
def act_only_mode(question: str, max_iterations: int = 5) ->
|
| 301 |
"""Act-Only mode: Tool use only, no explicit thinking."""
|
| 302 |
tool_descriptions = get_tool_descriptions()
|
| 303 |
prompt = ACT_ONLY_PROMPT.format(question=question, tools=tool_descriptions)
|
| 304 |
|
| 305 |
-
|
| 306 |
|
| 307 |
messages = [{"role": "user", "content": prompt}]
|
| 308 |
iteration = 0
|
|
@@ -315,35 +316,36 @@ def act_only_mode(question: str, max_iterations: int = 5) -> Generator[str, None
|
|
| 315 |
if 'Answer:' in response:
|
| 316 |
answer_match = re.search(r'Answer:\s*(.+)', response, re.IGNORECASE | re.DOTALL)
|
| 317 |
if answer_match:
|
| 318 |
-
|
| 319 |
break
|
| 320 |
|
| 321 |
action_name, action_input = parse_action(response)
|
| 322 |
|
| 323 |
if action_name and action_input:
|
| 324 |
-
|
| 325 |
-
|
| 326 |
|
| 327 |
observation = call_tool(action_name, action_input)
|
| 328 |
-
|
| 329 |
|
| 330 |
messages.append({"role": "assistant", "content": response})
|
| 331 |
messages.append({"role": "user", "content": f"Observation: {observation}\n\nContinue with another action or provide the final answer."})
|
| 332 |
else:
|
| 333 |
-
|
| 334 |
break
|
| 335 |
|
| 336 |
if iteration >= max_iterations:
|
| 337 |
-
|
| 338 |
|
| 339 |
-
|
|
|
|
| 340 |
|
| 341 |
-
def react_mode(question: str, max_iterations: int = 5) ->
|
| 342 |
"""ReAct mode: Interleaving Thought, Action, Observation."""
|
| 343 |
tool_descriptions = get_tool_descriptions()
|
| 344 |
prompt = REACT_PROMPT.format(question=question, tools=tool_descriptions)
|
| 345 |
|
| 346 |
-
|
| 347 |
|
| 348 |
messages = [{"role": "user", "content": prompt}]
|
| 349 |
iteration = 0
|
|
@@ -355,34 +357,35 @@ def react_mode(question: str, max_iterations: int = 5) -> Generator[str, None, N
|
|
| 355 |
|
| 356 |
thought_matches = re.findall(r'Thought:\s*(.+?)(?=\n(?:Action:|Answer:|$))', response, re.IGNORECASE | re.DOTALL)
|
| 357 |
for thought in thought_matches:
|
| 358 |
-
|
| 359 |
|
| 360 |
if 'Answer:' in response:
|
| 361 |
answer_match = re.search(r'Answer:\s*(.+)', response, re.IGNORECASE | re.DOTALL)
|
| 362 |
if answer_match:
|
| 363 |
-
|
| 364 |
break
|
| 365 |
|
| 366 |
action_name, action_input = parse_action(response)
|
| 367 |
|
| 368 |
if action_name and action_input:
|
| 369 |
-
|
| 370 |
-
|
| 371 |
|
| 372 |
observation = call_tool(action_name, action_input)
|
| 373 |
-
|
| 374 |
|
| 375 |
messages.append({"role": "assistant", "content": response})
|
| 376 |
messages.append({"role": "user", "content": f"Observation: {observation}\n\nThought:"})
|
| 377 |
else:
|
| 378 |
if 'Answer:' not in response:
|
| 379 |
-
|
| 380 |
break
|
| 381 |
|
| 382 |
if iteration >= max_iterations:
|
| 383 |
-
|
| 384 |
|
| 385 |
-
|
|
|
|
| 386 |
|
| 387 |
EXAMPLES = [
|
| 388 |
"What is the capital of France and what's the current weather there?",
|
|
@@ -406,6 +409,37 @@ def run_comparison(question: str, mode: str):
|
|
| 406 |
else:
|
| 407 |
return "Invalid mode selected.", "", ""
|
| 408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
with gr.Blocks(title="LLM Reasoning Modes Comparison") as demo:
|
| 410 |
gr.Markdown("""
|
| 411 |
# LLM Reasoning Modes Comparison
|
|
@@ -431,7 +465,9 @@ with gr.Blocks(title="LLM Reasoning Modes Comparison") as demo:
|
|
| 431 |
value="All (Compare)",
|
| 432 |
label="Select Mode"
|
| 433 |
)
|
| 434 |
-
|
|
|
|
|
|
|
| 435 |
|
| 436 |
with gr.Column(scale=1):
|
| 437 |
gr.Markdown("**Example Questions**")
|
|
@@ -451,11 +487,22 @@ with gr.Blocks(title="LLM Reasoning Modes Comparison") as demo:
|
|
| 451 |
with gr.Column():
|
| 452 |
react_output = gr.Markdown(label="ReAct Output")
|
| 453 |
|
|
|
|
|
|
|
| 454 |
submit_btn.click(
|
| 455 |
fn=run_comparison,
|
| 456 |
inputs=[question_input, mode_dropdown],
|
| 457 |
outputs=[think_output, act_output, react_output]
|
| 458 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
if __name__ == "__main__":
|
| 461 |
demo.launch()
|
|
|
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
import gradio as gr
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
import requests
|
| 7 |
from datetime import datetime
|
| 8 |
import ast
|
|
|
|
| 275 |
except Exception as e:
|
| 276 |
return f"Error calling LLM: {str(e)}"
|
| 277 |
|
| 278 |
+
def think_only_mode(question: str) -> str:
|
| 279 |
"""Think-Only mode: Chain-of-Thought only, no tools."""
|
| 280 |
prompt = THINK_ONLY_PROMPT.format(question=question)
|
| 281 |
messages = [{"role": "user", "content": prompt}]
|
| 282 |
|
| 283 |
+
output = "**Mode: Think-Only (Chain-of-Thought)**\n\n"
|
| 284 |
+
output += "Generating thoughts...\n\n"
|
| 285 |
|
| 286 |
response = call_llm(messages, temperature=0.7, max_tokens=800)
|
| 287 |
|
|
|
|
| 289 |
for line in lines:
|
| 290 |
if line.strip():
|
| 291 |
if line.strip().startswith('Thought:'):
|
| 292 |
+
output += f"**{line.strip()}**\n\n"
|
| 293 |
elif line.strip().startswith('Answer:'):
|
| 294 |
+
output += f"**{line.strip()}**\n\n"
|
| 295 |
else:
|
| 296 |
+
output += f"{line}\n\n"
|
| 297 |
|
| 298 |
+
output += "\n---\n**Mode completed**\n"
|
| 299 |
+
return output
|
| 300 |
|
| 301 |
+
def act_only_mode(question: str, max_iterations: int = 5) -> str:
|
| 302 |
"""Act-Only mode: Tool use only, no explicit thinking."""
|
| 303 |
tool_descriptions = get_tool_descriptions()
|
| 304 |
prompt = ACT_ONLY_PROMPT.format(question=question, tools=tool_descriptions)
|
| 305 |
|
| 306 |
+
output = "**Mode: Act-Only (Tool Use Only)**\n\n"
|
| 307 |
|
| 308 |
messages = [{"role": "user", "content": prompt}]
|
| 309 |
iteration = 0
|
|
|
|
| 316 |
if 'Answer:' in response:
|
| 317 |
answer_match = re.search(r'Answer:\s*(.+)', response, re.IGNORECASE | re.DOTALL)
|
| 318 |
if answer_match:
|
| 319 |
+
output += f"**Answer:** {answer_match.group(1).strip()}\n\n"
|
| 320 |
break
|
| 321 |
|
| 322 |
action_name, action_input = parse_action(response)
|
| 323 |
|
| 324 |
if action_name and action_input:
|
| 325 |
+
output += f"**Action:** {action_name}\n"
|
| 326 |
+
output += f"**Action Input:** {action_input}\n\n"
|
| 327 |
|
| 328 |
observation = call_tool(action_name, action_input)
|
| 329 |
+
output += f"**Observation:** {observation}\n\n"
|
| 330 |
|
| 331 |
messages.append({"role": "assistant", "content": response})
|
| 332 |
messages.append({"role": "user", "content": f"Observation: {observation}\n\nContinue with another action or provide the final answer."})
|
| 333 |
else:
|
| 334 |
+
output += f"Could not parse action from response. Response: {response}\n\n"
|
| 335 |
break
|
| 336 |
|
| 337 |
if iteration >= max_iterations:
|
| 338 |
+
output += "**Reached maximum iterations.**\n\n"
|
| 339 |
|
| 340 |
+
output += "\n---\n**Mode completed**\n"
|
| 341 |
+
return output
|
| 342 |
|
| 343 |
+
def react_mode(question: str, max_iterations: int = 5) -> str:
|
| 344 |
"""ReAct mode: Interleaving Thought, Action, Observation."""
|
| 345 |
tool_descriptions = get_tool_descriptions()
|
| 346 |
prompt = REACT_PROMPT.format(question=question, tools=tool_descriptions)
|
| 347 |
|
| 348 |
+
output = "**Mode: ReAct (Thought + Action + Observation)**\n\n"
|
| 349 |
|
| 350 |
messages = [{"role": "user", "content": prompt}]
|
| 351 |
iteration = 0
|
|
|
|
| 357 |
|
| 358 |
thought_matches = re.findall(r'Thought:\s*(.+?)(?=\n(?:Action:|Answer:|$))', response, re.IGNORECASE | re.DOTALL)
|
| 359 |
for thought in thought_matches:
|
| 360 |
+
output += f"**Thought:** {thought.strip()}\n\n"
|
| 361 |
|
| 362 |
if 'Answer:' in response:
|
| 363 |
answer_match = re.search(r'Answer:\s*(.+)', response, re.IGNORECASE | re.DOTALL)
|
| 364 |
if answer_match:
|
| 365 |
+
output += f"**Answer:** {answer_match.group(1).strip()}\n\n"
|
| 366 |
break
|
| 367 |
|
| 368 |
action_name, action_input = parse_action(response)
|
| 369 |
|
| 370 |
if action_name and action_input:
|
| 371 |
+
output += f"**Action:** {action_name}\n"
|
| 372 |
+
output += f"**Action Input:** {action_input}\n\n"
|
| 373 |
|
| 374 |
observation = call_tool(action_name, action_input)
|
| 375 |
+
output += f"**Observation:** {observation}\n\n"
|
| 376 |
|
| 377 |
messages.append({"role": "assistant", "content": response})
|
| 378 |
messages.append({"role": "user", "content": f"Observation: {observation}\n\nThought:"})
|
| 379 |
else:
|
| 380 |
if 'Answer:' not in response:
|
| 381 |
+
output += f"No action found. Response: {response}\n\n"
|
| 382 |
break
|
| 383 |
|
| 384 |
if iteration >= max_iterations:
|
| 385 |
+
output += "**Reached maximum iterations.**\n\n"
|
| 386 |
|
| 387 |
+
output += "\n---\n**Mode completed**\n"
|
| 388 |
+
return output
|
| 389 |
|
| 390 |
EXAMPLES = [
|
| 391 |
"What is the capital of France and what's the current weather there?",
|
|
|
|
| 409 |
else:
|
| 410 |
return "Invalid mode selected.", "", ""
|
| 411 |
|
| 412 |
+
def download_results(think_output: str, act_output: str, react_output: str, question: str):
|
| 413 |
+
"""Create a downloadable text file with all results."""
|
| 414 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 415 |
+
filename = f"reasoning_comparison_{timestamp}.txt"
|
| 416 |
+
|
| 417 |
+
content = f"""LLM Reasoning Modes Comparison Results
|
| 418 |
+
Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
| 419 |
+
Question: {question}
|
| 420 |
+
|
| 421 |
+
{'='*80}
|
| 422 |
+
THINK-ONLY MODE
|
| 423 |
+
{'='*80}
|
| 424 |
+
{think_output if think_output else "Not executed"}
|
| 425 |
+
|
| 426 |
+
{'='*80}
|
| 427 |
+
ACT-ONLY MODE
|
| 428 |
+
{'='*80}
|
| 429 |
+
{act_output if act_output else "Not executed"}
|
| 430 |
+
|
| 431 |
+
{'='*80}
|
| 432 |
+
REACT MODE
|
| 433 |
+
{'='*80}
|
| 434 |
+
{react_output if react_output else "Not executed"}
|
| 435 |
+
"""
|
| 436 |
+
|
| 437 |
+
filepath = f"/tmp/{filename}"
|
| 438 |
+
with open(filepath, 'w', encoding='utf-8') as f:
|
| 439 |
+
f.write(content)
|
| 440 |
+
|
| 441 |
+
return filepath
|
| 442 |
+
|
| 443 |
with gr.Blocks(title="LLM Reasoning Modes Comparison") as demo:
|
| 444 |
gr.Markdown("""
|
| 445 |
# LLM Reasoning Modes Comparison
|
|
|
|
| 465 |
value="All (Compare)",
|
| 466 |
label="Select Mode"
|
| 467 |
)
|
| 468 |
+
with gr.Row():
|
| 469 |
+
submit_btn = gr.Button("Run", variant="primary", size="lg")
|
| 470 |
+
download_btn = gr.Button("Download Results", variant="secondary", size="lg")
|
| 471 |
|
| 472 |
with gr.Column(scale=1):
|
| 473 |
gr.Markdown("**Example Questions**")
|
|
|
|
| 487 |
with gr.Column():
|
| 488 |
react_output = gr.Markdown(label="ReAct Output")
|
| 489 |
|
| 490 |
+
download_file = gr.File(label="Download", visible=False)
|
| 491 |
+
|
| 492 |
submit_btn.click(
|
| 493 |
fn=run_comparison,
|
| 494 |
inputs=[question_input, mode_dropdown],
|
| 495 |
outputs=[think_output, act_output, react_output]
|
| 496 |
)
|
| 497 |
+
|
| 498 |
+
download_btn.click(
|
| 499 |
+
fn=download_results,
|
| 500 |
+
inputs=[think_output, act_output, react_output, question_input],
|
| 501 |
+
outputs=download_file
|
| 502 |
+
).then(
|
| 503 |
+
fn=lambda: gr.File(visible=True),
|
| 504 |
+
outputs=download_file
|
| 505 |
+
)
|
| 506 |
|
| 507 |
if __name__ == "__main__":
|
| 508 |
demo.launch()
|