Spaces:

ayushm98
/

codepilot

Running

ayushm98 commited on 27 days ago

Commit

85d7785

1 Parent(s): 0887cf4

Add clean UI with agent progress, copyable code, and results table

New v3.4.0 features:
- Live agent progress table (Explorer → Planner → Coder → Reviewer)
- Status icons: ✅ done, ⏳ working, ⬜ waiting
- Copyable code blocks with syntax highlighting
- Review results table showing pass/fail for each step
- Cost summary displayed throughout

Files changed (2) hide show

Dockerfile +1 -1
chainlit_app.py +320 -385

Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # HuggingFace Spaces Dockerfile for CodePilot
-# BUILD_VERSION: 16 (v3.3.7 validate-all - validate all messages for tool_use/result pairs)
 FROM python:3.11-slim
 # Set working directory

 # HuggingFace Spaces Dockerfile for CodePilot
+# BUILD_VERSION: 17 (v3.4.0 clean-ui - clean progress display, copyable code, results table)
 FROM python:3.11-slim
 # Set working directory

chainlit_app.py CHANGED Viewed

@@ -2,17 +2,19 @@
 Chainlit UI for CodePilot Multi-Agent System
 This provides a chat interface showing detailed agent workflow:
 - Planner creates implementation plans
-- Coder writes code, uploads to sandbox, runs tests
 - Reviewer checks and approves code
-User can see every step in real-time.
 """
 import chainlit as cl
 import os
 import sys
 import io
 from contextlib import redirect_stdout, redirect_stderr
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
@@ -20,8 +22,8 @@ from concurrent.futures import ThreadPoolExecutor
 # ============================================================
 # STARTUP VERSION CHECK - Change this to detect if rebuild worked
 # ============================================================
-APP_VERSION = "3.3.7-validate-all"
-BUILD_ID = "2024-12-19-v15"
 print("=" * 60)
 print(f"[STARTUP] CodePilot Chainlit App")
 print(f"[STARTUP] APP_VERSION: {APP_VERSION}")
@@ -47,31 +49,186 @@ from codepilot.tools.github_tools import (
 )
-# Authentication disabled for now - uncomment to enable password protection
-# @cl.password_auth_callback
-# def auth_callback(username: str, password: str):
-#     """
-#     Simple password authentication for CodePilot.
-#
-#     For production, use environment variables and proper password hashing.
-#     """
-#     # Get password from environment variable (more secure)
-#     required_password = os.getenv('CHAINLIT_PASSWORD', 'codepilot2024')
-#
-#     # In production, you should hash passwords and use a proper auth system
-#     if password == required_password:
-#         return cl.User(
-#             identifier=username,
-#             metadata={"role": "user", "provider": "credentials"}
-#         )
-#     return None
 @cl.on_chat_start
 async def start():
     """Initialize the agent system when chat starts."""
-    print("[CHAINLIT] on_chat_start triggered")  # Debug log
     await cl.Message(
         content=f"# CodePilot - Autonomous AI Coding Agent\n\n"
@@ -80,20 +237,17 @@ async def start():
                 "**How to use:**\n"
                 "1. Paste a **public GitHub URL** and I'll clone and analyze it\n"
                 "2. Tell me what you want to build or fix\n"
-                "3. Watch my agents (Planner > Coder > Reviewer) work!\n\n"
                 "**Example:**\n"
-                "```\nAnalyze https://github.com/user/repo and add error handling to the API endpoints\n```\n\n"
-                "**Ready!** Paste a GitHub URL or describe your task."
     ).send()
-    print("[CHAINLIT] Welcome message sent")  # Debug log
     # Initialize session variables
     cl.user_session.set("repo_path", None)
     cl.user_session.set("repo_info", None)
-    # Skip self-indexing - agents will only work with cloned GitHub repos
-    # Create orchestrator and mark as ready
     cl.user_session.set("orchestrator", Orchestrator(max_iterations=10))
     cl.user_session.set("ready", True)
     print("[CHAINLIT] Orchestrator created, ready for GitHub repos")
@@ -102,115 +256,138 @@ async def start():
 @cl.on_chat_end
 async def end():
     """Cleanup when chat ends."""
-    # Clean up any cloned repositories
     repo_path = cl.user_session.get("repo_path")
     if repo_path:
         print(f"[CHAINLIT] Cleaning up repo: {repo_path}")
         cleanup_repository(repo_path)
 @cl.on_message
 async def main(message: cl.Message):
     """Handle user messages and run the agent workflow."""
-    # Check if ready
     if not cl.user_session.get("ready"):
         await cl.Message(content="System is still initializing, please wait...").send()
         return
-    # Get orchestrator
     orchestrator: Orchestrator = cl.user_session.get("orchestrator")
-    # Check if we're waiting for clarification answers
-    # If NOT waiting for clarification, this is a NEW task - reset orchestrator
-    if not cl.user_session.get("waiting_for_clarification"):
-        # Create fresh orchestrator for new tasks
-        orchestrator = Orchestrator(max_iterations=10)
-        cl.user_session.set("orchestrator", orchestrator)
-        print("[CHAINLIT] Created fresh orchestrator for new task")
     if cl.user_session.get("waiting_for_clarification"):
         cl.user_session.set("waiting_for_clarification", False)
         user_answers = message.content
-        await cl.Message(content="Got it! Let me create the plan with your clarifications...").send()
-        # Resume the orchestrator with user answers
-        log_msg = cl.Message(content="")
-        await log_msg.send()
-        try:
-            captured_output = io.StringIO()
-            def resume_orchestrator():
-                with redirect_stdout(captured_output), redirect_stderr(captured_output):
-                    return orchestrator.resume_after_clarification(user_answers)
-            loop = asyncio.get_event_loop()
-            executor = ThreadPoolExecutor(max_workers=1)
-            future = loop.run_in_executor(executor, resume_orchestrator)
-            # Track tokens
-            total_prompt_tokens = 0
-            total_completion_tokens = 0
-            total_tokens = 0
-            seen_token_lines = set()
-            # Stream logs
-            accumulated_logs = ""
-            while not future.done():
-                await asyncio.sleep(0.5)
-                current_output = captured_output.getvalue()
-                if current_output != accumulated_logs:
-                    accumulated_logs = current_output
-                    filtered_lines = []
-                    for line in accumulated_logs.split('\n'):
-                        if 'Tokens:' in line and line not in seen_token_lines:
-                            seen_token_lines.add(line)
-                            try:
-                                parts = line.split('Tokens:')[1].strip()
-                                prompt = int(parts.split('prompt')[0].strip())
-                                completion = int(parts.split('+')[1].split('completion')[0].strip())
-                                total_prompt_tokens += prompt
-                                total_completion_tokens += completion
-                                total_tokens += (prompt + completion)
-                            except:
-                                pass
-                        if any(skip in line for skip in ['Tokens:', 'Batches:', '|##', 'it/s]']):
-                            continue
-                        if any(keep in line for keep in [
-                            '[CLASSIFIER]', '[ORCHESTRATOR]', '[PLANNER]', '[CODER]', '[REVIEWER]',
-                            '[EXPLORER]', 'Calling tool:', 'Transitioning', 'APPROVED', 'REJECTED'
-                        ]):
-                            filtered_lines.append(line)
-                    filtered_output = '\n'.join(filtered_lines)
-                    input_cost = (total_prompt_tokens / 1000000) * 3.0
-                    output_cost = (total_completion_tokens / 1000000) * 15.0
-                    total_cost = input_cost + output_cost
-                    usage_summary = f"\n\nCREDITS: ${total_cost:.4f}"
-                    log_msg.content = f"```\n{filtered_output}{usage_summary}\n```"
-                    await log_msg.update()
-            result = await future
-            # Continue to show results (handled by falling through to normal result handling below)
-            # For now, show summary directly
-            summary = f"## Result\n**Status:** {result.get('status')}\n"
             if result.get('code_changes'):
-                summary += f"**Files created:** {len(result['code_changes'])}\n"
-            summary += f"**Cost:** ${total_cost:.4f}"
-            await cl.Message(content=summary).send()
-            return
-        except Exception as e:
-            await cl.Message(content=f"Error resuming: {str(e)}").send()
-            return
-    # Check for GitHub URL in message
     github_url = extract_github_url(message.content)
     task_context = ""
     if github_url:
-        # Clone the repository
-        clone_msg = await cl.Message(content=f"Cloning repository: `{github_url}`...").send()
         success, result, repo_name = clone_repository(github_url)
@@ -218,24 +395,16 @@ async def main(message: cl.Message):
             repo_path = result
             repo_info = get_repo_info(repo_path)
-            # Store in session
             cl.user_session.set("repo_path", repo_path)
             cl.user_session.set("repo_info", repo_info)
-            # Index the repository for search (full BM25 + embeddings)
             try:
-                index_result = index_codebase(repo_path)
-                print(f"[CHAINLIT] Repository indexed: {index_result}")
             except Exception as e:
-                print(f"[CHAINLIT] Indexing failed (non-critical): {e}")
-            # Create context for the task (limited to avoid token overflow)
             languages = ", ".join(repo_info["languages"][:5]) if repo_info["languages"] else "Unknown"
-            # Only include first 20 files to keep context small
-            sample_files = repo_info["files"][:20] if repo_info["files"] else []
-            files_preview = "\n".join(f"  - {f}" for f in sample_files)
-            if len(repo_info["files"]) > 20:
-                files_preview += f"\n  ... and {len(repo_info['files']) - 20} more files"
             task_context = f"""
 [REPOSITORY CONTEXT]
@@ -243,30 +412,19 @@ Repository: {repo_name}
 Path: {repo_path}
 Total Files: {repo_info['total_files']}
 Languages: {languages}
-Sample Files:
-{files_preview}
 AVAILABLE TOOLS:
-- search_repository: Search this cloned repository using BM25 keyword matching (use this to find functions, classes, or code patterns in the Flask repo)
-- read_file: Read a specific file (use full path: {repo_path}/filename.py)
-- search_code: Grep for exact pattern matches in the repository
 """
-            # Update clone message
-            clone_msg.content = f"**Repository cloned successfully!**\n\n" \
-                               f"- **Name:** {repo_name}\n" \
-                               f"- **Files:** {repo_info['total_files']}\n" \
-                               f"- **Languages:** {languages}\n" \
-                               f"- **Path:** `{repo_path}`"
             await clone_msg.update()
         else:
-            # Clone failed
-            clone_msg.content = f"**Failed to clone repository**\n\n{result}\n\n" \
-                               f"Make sure the repository is public and the URL is correct."
             await clone_msg.update()
             return
-    # Check if we have a repo from previous message
     elif cl.user_session.get("repo_path"):
         repo_path = cl.user_session.get("repo_path")
         repo_info = cl.user_session.get("repo_info")
@@ -280,266 +438,43 @@ Total Files: {repo_info['total_files']}
 Languages: {languages}
 AVAILABLE TOOLS:
-- search_repository: Search this cloned repository using BM25 keyword matching (use this to find functions, classes, or code patterns in the Flask repo)
-- read_file: Read a specific file (use full path: {repo_path}/filename.py)
-- search_code: Grep for exact pattern matches in the repository
 """
-    # Prepare the full task with context
-    # Remove the GitHub URL from the message to get just the user's query
     user_query = message.content
-    print(f"[DEBUG] Original message.content: '{message.content}'")
-    print(f"[DEBUG] GitHub URL found: '{github_url}'")
     if github_url:
-        # Remove the URL from the message to get the actual task
-        import re
         user_query = re.sub(r'https?://github\.com/[^\s]+', '', user_query).strip()
-        print(f"[DEBUG] After URL removal: '{user_query}'")
     full_task = task_context + "\n\n" + user_query if task_context else user_query
-    print(f"[DEBUG] task_context exists: {bool(task_context)}")
-    print(f"[DEBUG] task_context length: {len(task_context) if task_context else 0}")
-    print(f"[DEBUG] Final user_query: '{user_query}'")
-    print(f"[DEBUG] Full task (first 500 chars): '{full_task[:500]}...'")
-    # Create a message for streaming logs
-    log_msg = cl.Message(content="")
-    await log_msg.send()
-    try:
-        # Capture stdout/stderr to stream logs
-        captured_output = io.StringIO()
-        def run_orchestrator():
-            """Run orchestrator in thread and capture output."""
-            try:
-                with redirect_stdout(captured_output), redirect_stderr(captured_output):
-                    return orchestrator.run(full_task)
-            except Exception as e:
-                # Capture any exceptions from orchestrator
-                print(f"Error in orchestrator: {str(e)}")
-                import traceback
-                traceback.print_exc()
-                raise
-        # Run in thread pool to avoid blocking
-        loop = asyncio.get_event_loop()
-        executor = ThreadPoolExecutor(max_workers=1)
-        # Start the orchestrator in background
-        future = loop.run_in_executor(executor, run_orchestrator)
-        # Track API usage
-        total_prompt_tokens = 0
-        total_completion_tokens = 0
-        total_tokens = 0
-        seen_token_lines = set()  # Track which token lines we've already counted
-        # Stream logs while orchestrator is running - FILTERED
-        accumulated_logs = ""
-        while not future.done():
-            await asyncio.sleep(0.5)  # Check every 500ms
-            # Get new output
-            current_output = captured_output.getvalue()
-            if current_output != accumulated_logs:
-                accumulated_logs = current_output
-                # Filter logs to show only important lines
-                filtered_lines = []
-                for line in accumulated_logs.split('\n'):
-                    # Extract token usage before filtering (only count each line once!)
-                    if 'Tokens:' in line and line not in seen_token_lines:
-                        seen_token_lines.add(line)  # Mark as counted
-                        try:
-                            # Parse: "Tokens: 505 prompt + 20 completion = 525 total"
-                            parts = line.split('Tokens:')[1].strip()
-                            prompt = int(parts.split('prompt')[0].strip())
-                            completion = int(parts.split('+')[1].split('completion')[0].strip())
-                            total_prompt_tokens += prompt
-                            total_completion_tokens += completion
-                            total_tokens += (prompt + completion)
-                        except:
-                            pass
-                    # Skip token counts, progress bars, and verbose details
-                    if any(skip in line for skip in ['Tokens:', 'Batches:', '|##', 'it/s]']):
-                        continue
-                    # Keep important lines
-                    if any(keep in line for keep in [
-                        '[CLASSIFIER]', '[ORCHESTRATOR]', '[PLANNER]', '[CODER]', '[REVIEWER]',
-                        '[EXPLORER]', 'Calling tool:', 'Tool', 'Transitioning', 'APPROVED', 'REJECTED',
-                        '[GITHUB]', 'Cloning', 'Repository'
-                    ]):
-                        filtered_lines.append(line)
-                filtered_output = '\n'.join(filtered_lines)
-                # Calculate cost (Claude Sonnet 4.5 pricing: $3/1M input, $15/1M output)
-                input_cost = (total_prompt_tokens / 1000000) * 3.0
-                output_cost = (total_completion_tokens / 1000000) * 15.0
-                total_cost = input_cost + output_cost
-                # Add usage summary to logs
-                usage_summary = f"\n\nCREDITS USED:\n"
-                usage_summary += f"  Input:  {total_prompt_tokens:,} tokens (${input_cost:.4f})\n"
-                usage_summary += f"  Output: {total_completion_tokens:,} tokens (${output_cost:.4f})\n"
-                usage_summary += f"  Total:  {total_tokens:,} tokens (${total_cost:.4f})"
-                # Update message with filtered logs + usage
-                log_msg.content = f"```\n{filtered_output}\n{usage_summary}\n```"
-                await log_msg.update()
-        # Get final result
-        result = await future
-        # Get final logs
-        final_logs = captured_output.getvalue()
-        # Update with final logs
-        log_msg.content = f"## Execution Log\n```\n{final_logs}\n```"
-        await log_msg.update()
-        # Check if we need clarification from user
-        if result.get('status') == 'clarifying' and result.get('clarifying_questions'):
-            questions = result['clarifying_questions']
-            # Store that we're waiting for clarification
-            cl.user_session.set("waiting_for_clarification", True)
-            await cl.Message(
-                content=f"## Before I proceed, I have some questions:\n\n{questions}\n\n"
-                        f"**Please answer the questions above so I can create a better plan.**"
-            ).send()
-            return  # Wait for user to respond
-        # Send results summary
-        summary_lines = []
-        if result.get('plan'):
-            summary_lines.append("## Planner")
-            summary_lines.append(f"Plan created ({len(result['plan'])} chars)\n")
-        if result.get('code_changes'):
-            summary_lines.append("## Coder")
-            summary_lines.append(f"Created {len(result['code_changes'])} file(s):")
-            for file_path in result['code_changes'].keys():
-                summary_lines.append(f"  - {file_path}")
-            summary_lines.append("")
-        if result.get('review_feedback'):
-            summary_lines.append("## Reviewer")
-            if result.get('success'):
-                summary_lines.append("Code approved")
-            else:
-                summary_lines.append("Needs revision")
-            summary_lines.append("")
-        summary_lines.append("## Result")
-        if result.get('success'):
-            summary_lines.append(f"**Success** (Iterations: {result.get('iterations', 'N/A')})")
-        else:
-            summary_lines.append(f"**Incomplete** (Iterations: {result.get('iterations', 'N/A')})")
-        # Add final cost summary (Claude Sonnet 4.5 pricing: $3/1M input, $15/1M output)
-        summary_lines.append("\n## API Credits Used (Claude Sonnet 4.5)")
-        summary_lines.append(f"**Total Tokens:** {total_tokens:,}")
-        summary_lines.append(f"- Input: {total_prompt_tokens:,} tokens (${(total_prompt_tokens/1000000)*3.0:.4f})")
-        summary_lines.append(f"- Output: {total_completion_tokens:,} tokens (${(total_completion_tokens/1000000)*15.0:.4f})")
-        summary_lines.append(f"\n**Estimated Cost:** ${total_cost:.4f}")
-        await cl.Message(content="\n".join(summary_lines)).send()
-    except Exception as e:
-        # Determine error type and provide specific guidance
-        error_message = str(e)
-        error_type = type(e).__name__
-        if "rate_limit" in error_message.lower() or "429" in error_message:
-            user_message = f"""## Rate Limit Reached
-Claude API rate limit exceeded. This happens when too many requests are made in a short time.
-**What to do:**
-- Wait a few minutes and try again
-- Reduce max_iterations (currently: {orchestrator.max_iterations})
-- Your request will work once the rate limit resets
-**Error details:**
-```
-{error_message}
-```
-"""
-        elif "insufficient_quota" in error_message.lower() or "credit" in error_message.lower():
-            user_message = f"""## API Credits Exhausted
-Your Anthropic API credits have been exhausted.
-**What to do:**
-- Add credits to your Anthropic account at https://console.anthropic.com/settings/billing
-- Check your usage at https://console.anthropic.com/settings/usage
-- Current model: Claude Sonnet 4.5 (~$0.20 per task)
-**Error details:**
-```
-{error_message}
-```
-"""
-        elif "api_key" in error_message.lower() or "authentication" in error_message.lower():
-            user_message = f"""## API Key Error
-There's an issue with your Anthropic API key.
-**What to do:**
-- Verify your ANTHROPIC_API_KEY in .env file
-- Check that the key is valid at https://console.anthropic.com/settings/keys
-- Restart the application after updating .env
-**Error details:**
-```
-{error_message}
-```
-"""
-        elif "timeout" in error_message.lower():
-            user_message = f"""## Request Timeout
-The operation took too long and timed out.
-**What to do:**
-- Try again with a simpler task
-- The task may be too complex for one iteration
-- Consider breaking it into smaller steps
-**Error details:**
-```
-{error_message}
-```
-"""
-        else:
-            # Generic error with helpful context
-            user_message = f"""## Error Occurred
-An unexpected error occurred during execution.
-**Error type:** {error_type}
-**What to do:**
-- Try rephrasing your request
-- Check if all required files/dependencies exist
-- Verify your .env file has all required API keys
-**Error details:**
-```
-{error_message}
-```
-If this persists, please report the issue with the error details above.
-"""
-        await cl.Message(content=user_message).send()
 if __name__ == "__main__":
-    import sys
     sys.exit("Run with: chainlit run chainlit_app.py")

 Chainlit UI for CodePilot Multi-Agent System
 This provides a chat interface showing detailed agent workflow:
+- Explorer searches the codebase
 - Planner creates implementation plans
+- Coder writes code
 - Reviewer checks and approves code
+User sees clean progress updates and copyable code output.
 """
 import chainlit as cl
 import os
 import sys
 import io
+import re
 from contextlib import redirect_stdout, redirect_stderr
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 # ============================================================
 # STARTUP VERSION CHECK - Change this to detect if rebuild worked
 # ============================================================
+APP_VERSION = "3.4.0-clean-ui"
+BUILD_ID = "2024-12-20-v1"
 print("=" * 60)
 print(f"[STARTUP] CodePilot Chainlit App")
 print(f"[STARTUP] APP_VERSION: {APP_VERSION}")
 )
+def get_file_extension(file_path: str) -> str:
+    """Get language identifier for syntax highlighting."""
+    ext_map = {
+        '.py': 'python',
+        '.js': 'javascript',
+        '.ts': 'typescript',
+        '.jsx': 'jsx',
+        '.tsx': 'tsx',
+        '.html': 'html',
+        '.css': 'css',
+        '.json': 'json',
+        '.md': 'markdown',
+        '.yml': 'yaml',
+        '.yaml': 'yaml',
+        '.sql': 'sql',
+        '.sh': 'bash',
+        '.rs': 'rust',
+        '.go': 'go',
+        '.java': 'java',
+        '.rb': 'ruby',
+        '.php': 'php',
+    }
+    ext = os.path.splitext(file_path)[1].lower()
+    return ext_map.get(ext, '')
+def format_code_output(code_changes: dict) -> str:
+    """Format code changes as copyable markdown code blocks."""
+    if not code_changes:
+        return "No code changes."
+    output = []
+    for file_path, content in code_changes.items():
+        # Get just the filename for display
+        filename = os.path.basename(file_path)
+        lang = get_file_extension(file_path)
+        output.append(f"### `{filename}`")
+        output.append(f"**Full path:** `{file_path}`")
+        output.append(f"```{lang}")
+        output.append(content)
+        output.append("```")
+        output.append("")
+    return "\n".join(output)
+def parse_agent_status(logs: str) -> dict:
+    """Parse logs to extract agent status information."""
+    status = {
+        'current_agent': None,
+        'explorer_done': False,
+        'planner_done': False,
+        'coder_done': False,
+        'reviewer_done': False,
+        'approved': None,
+        'tools_called': [],
+    }
+    for line in logs.split('\n'):
+        if '[EXPLORER]' in line:
+            status['current_agent'] = 'Explorer'
+            if 'Calling tool:' in line:
+                tool = line.split('Calling tool:')[1].strip()
+                status['tools_called'].append(f"Explorer: {tool}")
+        elif '[PLANNER]' in line:
+            status['current_agent'] = 'Planner'
+            if 'Plan created' in line:
+                status['planner_done'] = True
+        elif '[CODER]' in line:
+            status['current_agent'] = 'Coder'
+            if 'Calling tool:' in line:
+                tool = line.split('Calling tool:')[1].strip()
+                status['tools_called'].append(f"Coder: {tool}")
+            if 'Finished implementation' in line:
+                status['coder_done'] = True
+        elif '[REVIEWER]' in line:
+            status['current_agent'] = 'Reviewer'
+            if 'Calling tool:' in line:
+                tool = line.split('Calling tool:')[1].strip()
+                status['tools_called'].append(f"Reviewer: {tool}")
+        elif 'APPROVED' in line:
+            status['approved'] = True
+            status['reviewer_done'] = True
+        elif 'REJECTED' in line:
+            status['approved'] = False
+            status['reviewer_done'] = True
+        elif 'Transitioning to CLARIFYING' in line:
+            status['explorer_done'] = True
+        elif 'Transitioning to PLANNING' in line:
+            status['explorer_done'] = True
+        elif 'Transitioning to CODING' in line:
+            status['planner_done'] = True
+        elif 'Transitioning to REVIEWING' in line:
+            status['coder_done'] = True
+        elif 'Transitioning to COMPLETE' in line:
+            status['reviewer_done'] = True
+    return status
+def format_progress_display(status: dict, total_cost: float) -> str:
+    """Format a clean progress display."""
+    def icon(done: bool, active: bool = False) -> str:
+        if done:
+            return "✅"
+        elif active:
+            return "⏳"
+        else:
+            return "⬜"
+    current = status['current_agent']
+    lines = ["## Agent Progress\n"]
+    lines.append("| Agent | Status |")
+    lines.append("|-------|--------|")
+    lines.append(f"| Explorer | {icon(status['explorer_done'], current == 'Explorer')} {'Searching codebase...' if current == 'Explorer' and not status['explorer_done'] else 'Done' if status['explorer_done'] else 'Waiting'} |")
+    lines.append(f"| Planner | {icon(status['planner_done'], current == 'Planner')} {'Creating plan...' if current == 'Planner' and not status['planner_done'] else 'Done' if status['planner_done'] else 'Waiting'} |")
+    lines.append(f"| Coder | {icon(status['coder_done'], current == 'Coder')} {'Writing code...' if current == 'Coder' and not status['coder_done'] else 'Done' if status['coder_done'] else 'Waiting'} |")
+    reviewer_status = 'Waiting'
+    if current == 'Reviewer' and not status['reviewer_done']:
+        reviewer_status = 'Reviewing...'
+    elif status['reviewer_done']:
+        reviewer_status = '**APPROVED**' if status['approved'] else '**REJECTED**'
+    lines.append(f"| Reviewer | {icon(status['reviewer_done'], current == 'Reviewer')} {reviewer_status} |")
+    lines.append(f"\n**Cost:** ${total_cost:.4f}")
+    return "\n".join(lines)
+def format_final_result(result: dict, total_cost: float) -> str:
+    """Format the final result with test table and summary."""
+    lines = []
+    # Status header
+    success = result.get('success', False)
+    status_icon = "✅" if success else "❌"
+    lines.append(f"## Result: {status_icon} {'Success' if success else 'Failed'}\n")
+    # Review result table
+    lines.append("### Review Results\n")
+    lines.append("| Check | Result |")
+    lines.append("|-------|--------|")
+    if result.get('plan'):
+        lines.append("| Plan Created | ✅ Pass |")
+    else:
+        lines.append("| Plan Created | ❌ Fail |")
+    if result.get('code_changes'):
+        lines.append(f"| Code Written | ✅ Pass ({len(result['code_changes'])} files) |")
+    else:
+        lines.append("| Code Written | ❌ Fail |")
+    if result.get('review_feedback'):
+        if success:
+            lines.append("| Code Review | ✅ Approved |")
+        else:
+            lines.append("| Code Review | ❌ Rejected |")
+    else:
+        lines.append("| Code Review | ⬜ Not Run |")
+    lines.append("")
+    # Cost summary
+    lines.append("### Cost Summary\n")
+    lines.append(f"**Total Cost:** ${total_cost:.4f}")
+    lines.append(f"**Iterations:** {result.get('iterations', 'N/A')}")
+    return "\n".join(lines)
 @cl.on_chat_start
 async def start():
     """Initialize the agent system when chat starts."""
+    print("[CHAINLIT] on_chat_start triggered")
     await cl.Message(
         content=f"# CodePilot - Autonomous AI Coding Agent\n\n"
                 "**How to use:**\n"
                 "1. Paste a **public GitHub URL** and I'll clone and analyze it\n"
                 "2. Tell me what you want to build or fix\n"
+                "3. Watch my agents (Explorer → Planner → Coder → Reviewer) work!\n\n"
                 "**Example:**\n"
+                "```\nhttps://github.com/pallets/flask add a health check endpoint example\n```\n\n"
+                "**Ready!** Paste a GitHub URL with your task."
     ).send()
+    print("[CHAINLIT] Welcome message sent")
     # Initialize session variables
     cl.user_session.set("repo_path", None)
     cl.user_session.set("repo_info", None)
     cl.user_session.set("orchestrator", Orchestrator(max_iterations=10))
     cl.user_session.set("ready", True)
     print("[CHAINLIT] Orchestrator created, ready for GitHub repos")
 @cl.on_chat_end
 async def end():
     """Cleanup when chat ends."""
     repo_path = cl.user_session.get("repo_path")
     if repo_path:
         print(f"[CHAINLIT] Cleaning up repo: {repo_path}")
         cleanup_repository(repo_path)
+async def run_workflow(orchestrator, task_or_answers, is_resume=False):
+    """Run the orchestrator workflow and display clean progress."""
+    # Create progress message
+    progress_msg = cl.Message(content="## Agent Progress\n\nStarting...")
+    await progress_msg.send()
+    try:
+        captured_output = io.StringIO()
+        def run_task():
+            with redirect_stdout(captured_output), redirect_stderr(captured_output):
+                if is_resume:
+                    return orchestrator.resume_after_clarification(task_or_answers)
+                else:
+                    return orchestrator.run(task_or_answers)
+        loop = asyncio.get_event_loop()
+        executor = ThreadPoolExecutor(max_workers=1)
+        future = loop.run_in_executor(executor, run_task)
+        # Track tokens for cost calculation
+        total_prompt_tokens = 0
+        total_completion_tokens = 0
+        seen_token_lines = set()
+        # Stream progress updates
+        accumulated_logs = ""
+        while not future.done():
+            await asyncio.sleep(0.3)
+            current_output = captured_output.getvalue()
+            if current_output != accumulated_logs:
+                accumulated_logs = current_output
+                # Extract token usage
+                for line in accumulated_logs.split('\n'):
+                    if 'Tokens:' in line and line not in seen_token_lines:
+                        seen_token_lines.add(line)
+                        try:
+                            parts = line.split('Tokens:')[1].strip()
+                            prompt = int(parts.split('prompt')[0].strip())
+                            completion = int(parts.split('+')[1].split('completion')[0].strip())
+                            total_prompt_tokens += prompt
+                            total_completion_tokens += completion
+                        except:
+                            pass
+                # Calculate cost
+                total_cost = (total_prompt_tokens / 1000000) * 3.0 + (total_completion_tokens / 1000000) * 15.0
+                # Parse and display progress
+                status = parse_agent_status(accumulated_logs)
+                progress_msg.content = format_progress_display(status, total_cost)
+                await progress_msg.update()
+        # Get final result
+        result = await future
+        # Final token count
+        final_logs = captured_output.getvalue()
+        for line in final_logs.split('\n'):
+            if 'Tokens:' in line and line not in seen_token_lines:
+                seen_token_lines.add(line)
+                try:
+                    parts = line.split('Tokens:')[1].strip()
+                    prompt = int(parts.split('prompt')[0].strip())
+                    completion = int(parts.split('+')[1].split('completion')[0].strip())
+                    total_prompt_tokens += prompt
+                    total_completion_tokens += completion
+                except:
+                    pass
+        total_cost = (total_prompt_tokens / 1000000) * 3.0 + (total_completion_tokens / 1000000) * 15.0
+        # Update progress with final status
+        status = parse_agent_status(final_logs)
+        progress_msg.content = format_progress_display(status, total_cost)
+        await progress_msg.update()
+        return result, total_cost
+    except Exception as e:
+        await cl.Message(content=f"## Error\n\n```\n{str(e)}\n```").send()
+        return None, 0
 @cl.on_message
 async def main(message: cl.Message):
     """Handle user messages and run the agent workflow."""
     if not cl.user_session.get("ready"):
         await cl.Message(content="System is still initializing, please wait...").send()
         return
     orchestrator: Orchestrator = cl.user_session.get("orchestrator")
+    # Handle clarification answers
     if cl.user_session.get("waiting_for_clarification"):
         cl.user_session.set("waiting_for_clarification", False)
         user_answers = message.content
+        await cl.Message(content="Thanks! Creating plan with your input...").send()
+        result, total_cost = await run_workflow(orchestrator, user_answers, is_resume=True)
+        if result:
+            # Show final result
+            await cl.Message(content=format_final_result(result, total_cost)).send()
+            # Show code if any
             if result.get('code_changes'):
+                await cl.Message(content="## Generated Code\n\n" + format_code_output(result['code_changes'])).send()
+        return
+    # New task - reset orchestrator
+    orchestrator = Orchestrator(max_iterations=10)
+    cl.user_session.set("orchestrator", orchestrator)
+    print("[CHAINLIT] Created fresh orchestrator for new task")
+    # Check for GitHub URL
     github_url = extract_github_url(message.content)
     task_context = ""
     if github_url:
+        clone_msg = await cl.Message(content=f"📦 Cloning `{github_url}`...").send()
         success, result, repo_name = clone_repository(github_url)
             repo_path = result
             repo_info = get_repo_info(repo_path)
             cl.user_session.set("repo_path", repo_path)
             cl.user_session.set("repo_info", repo_info)
+            # Index repository
             try:
+                index_codebase(repo_path)
             except Exception as e:
+                print(f"[CHAINLIT] Indexing failed: {e}")
             languages = ", ".join(repo_info["languages"][:5]) if repo_info["languages"] else "Unknown"
             task_context = f"""
 [REPOSITORY CONTEXT]
 Path: {repo_path}
 Total Files: {repo_info['total_files']}
 Languages: {languages}
 AVAILABLE TOOLS:
+- search_repository: Search using BM25
+- read_file: Read file (use full path: {repo_path}/...)
+- search_code: Grep for patterns
 """
+            clone_msg.content = f"✅ **Cloned:** {repo_name} ({repo_info['total_files']} files, {languages})"
             await clone_msg.update()
         else:
+            clone_msg.content = f"❌ **Failed to clone:** {result}"
             await clone_msg.update()
             return
     elif cl.user_session.get("repo_path"):
         repo_path = cl.user_session.get("repo_path")
         repo_info = cl.user_session.get("repo_info")
 Languages: {languages}
 AVAILABLE TOOLS:
+- search_repository: Search using BM25
+- read_file: Read file (use full path: {repo_path}/...)
+- search_code: Grep for patterns
 """
+    # Extract user query (remove URL if present)
     user_query = message.content
     if github_url:
         user_query = re.sub(r'https?://github\.com/[^\s]+', '', user_query).strip()
     full_task = task_context + "\n\n" + user_query if task_context else user_query
+    # Run workflow
+    result, total_cost = await run_workflow(orchestrator, full_task, is_resume=False)
+    if not result:
+        return
+    # Check if clarification needed
+    if result.get('status') == 'clarifying' and result.get('clarifying_questions'):
+        cl.user_session.set("waiting_for_clarification", True)
+        await cl.Message(
+            content=f"## Before I proceed, I have some questions:\n\n{result['clarifying_questions']}\n\n**Please answer above to continue.**"
+        ).send()
+        return
+    # Show final result
+    await cl.Message(content=format_final_result(result, total_cost)).send()
+    # Show generated code
+    if result.get('code_changes'):
+        await cl.Message(content="## Generated Code\n\n" + format_code_output(result['code_changes'])).send()
+    # Show plan if available
+    if result.get('plan') and not result.get('code_changes'):
+        await cl.Message(content=f"## Plan\n\n{result['plan']}").send()
 if __name__ == "__main__":
     sys.exit("Run with: chainlit run chainlit_app.py")