#!/usr/bin/env python3
"""
Shinka Visualization Module
This module provides visualization capabilities for Shinka evolution results.
It serves a web interface for exploring evolution databases and meta files.
"""
import argparse
import base64
import http.server
import json
import markdown
import os
import re
import socketserver
import sqlite3
import subprocess
import sys
import tempfile
import threading
import time
import urllib.parse
import webbrowser
from pathlib import Path
from typing import Optional, Dict, Any, Tuple
from shinka.database import DatabaseConfig, ProgramDatabase
# We'll use a simple text-to-PDF approach instead of complex dependencies
WEASYPRINT_AVAILABLE = False
DEFAULT_PORT = 8000
CACHE_EXPIRATION_SECONDS = 5 # Cache data for 5 seconds
db_cache: Dict[str, Tuple[float, Any]] = {}
class DatabaseRequestHandler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *args, search_root=None, **kwargs):
self.search_root = search_root or os.getcwd()
super().__init__(*args, **kwargs)
def log_message(self, format, *args):
"""Override to provide more detailed logging."""
print(f"\n[SERVER] {format % args}")
def do_GET(self):
print(f"\n[SERVER] Received GET request for: {self.path}")
parsed_url = urllib.parse.urlparse(self.path)
path = parsed_url.path
query = urllib.parse.parse_qs(parsed_url.query)
if path == "/list_databases":
return self.handle_list_databases()
if path == "/get_programs" and "db_path" in query:
db_path = query["db_path"][0]
return self.handle_get_programs(db_path)
if path == "/get_meta_files" and "db_path" in query:
db_path = query["db_path"][0]
return self.handle_get_meta_files(db_path)
if path == "/get_meta_content" and "db_path" in query and "generation" in query:
db_path = query["db_path"][0]
generation = query["generation"][0]
return self.handle_get_meta_content(db_path, generation)
if (
path == "/download_meta_pdf"
and "db_path" in query
and "generation" in query
):
db_path = query["db_path"][0]
generation = query["generation"][0]
return self.handle_download_meta_pdf(db_path, generation)
if path == "/":
print("[SERVER] Root path requested, serving viz_tree.html")
self.path = "/viz_tree.html"
# Serve static files from the webui directory
return http.server.SimpleHTTPRequestHandler.do_GET(self)
def handle_list_databases(self):
"""Scan the search root directory for .db files."""
print(
f"[SERVER] Received request for database list, "
f"searching in: {self.search_root}"
)
db_files = []
date_pattern = re.compile(r"_(\d{8}_\d{6})")
# Get the task name from the search root directory name
task_name = os.path.basename(self.search_root)
if os.path.exists(self.search_root):
print(f"[SERVER] Scanning for .db files in: {self.search_root}")
for root, _, files in os.walk(self.search_root):
for f in files:
if f.lower().endswith((".db", ".sqlite")):
full_path = os.path.join(root, f)
client_path = os.path.relpath(full_path, self.search_root)
display_name = f"{Path(f).stem} - {Path(client_path).parent}"
# Extract date for sorting
sort_key = "0" # Default for paths without a date
match = date_pattern.search(client_path)
if match:
sort_key = match.group(1)
# Modify the path structure to include task name for proper organization
# If the path doesn't already have 3+ parts, prepend the task name
path_parts = client_path.split("/")
if len(path_parts) < 3:
# Add task name as the first part of the path
modified_client_path = f"{task_name}/{client_path}"
else:
modified_client_path = client_path
db_info = {
"path": modified_client_path,
"name": display_name,
"sort_key": sort_key, # Add key for sorting
"actual_path": client_path, # Keep the actual relative path for file operations
}
db_files.append(db_info)
print(
f"[SERVER] Found DB: {client_path} -> {modified_client_path} (sort: {sort_key})"
)
if not db_files:
print("[SERVER] No database files found in search directory.")
# Sort databases by the extracted date, newest first
db_files.sort(key=lambda x: x.get("sort_key", "0"), reverse=True)
# Remove sort_key before sending to client (but keep actual_path)
for db in db_files:
del db["sort_key"]
self.send_json_response(db_files)
print(f"[SERVER] Served DB list with {len(db_files)} entries, sorted by date.")
def _get_actual_db_path(self, db_path: str) -> str:
"""Convert a potentially modified db_path back to the actual file path."""
task_name = os.path.basename(self.search_root)
# If the path starts with the task name, remove it
if db_path.startswith(f"{task_name}/"):
return db_path[len(task_name) + 1 :]
return db_path
def handle_get_programs(self, db_path: str):
"""Fetch all programs from a given database file."""
print(f"[SERVER] Fetching programs from DB: {db_path}")
# Handle the case where db_path might have the task name prepended
# Extract the actual path by removing the task name prefix if present
actual_db_path = self._get_actual_db_path(db_path)
# Check cache first
if db_path in db_cache:
last_fetch_time, cached_data = db_cache[db_path]
if time.time() - last_fetch_time < CACHE_EXPIRATION_SECONDS:
print(f"[SERVER] Serving from cache for DB: {db_path}")
self.send_json_response(cached_data)
return
# Construct absolute path to the database from search root using actual path
abs_db_path = os.path.join(self.search_root, actual_db_path)
print(f"[SERVER] Absolute DB path: {abs_db_path} (from {db_path})")
if not os.path.exists(abs_db_path):
self.send_error(404, f"Database file not found: {actual_db_path}")
return
# Retry logic for the reader with improved WAL mode support
max_retries = 5 # Increased retries for better resilience
delay = 0.1 # Shorter initial delay
for i in range(max_retries):
db = None
try:
config = DatabaseConfig(db_path=abs_db_path)
db = ProgramDatabase(config, read_only=True)
# Set WAL mode compatible settings for read-only connections
if db.cursor:
db.cursor.execute(
"PRAGMA busy_timeout = 10000;"
) # 10 second timeout
db.cursor.execute("PRAGMA journal_mode = WAL;") # Ensure WAL mode
programs = db.get_all_programs()
# Convert Program objects to dicts for JSON
programs_dict = [p.to_dict() for p in programs]
# Update cache
db_cache[db_path] = (time.time(), programs_dict)
self.send_json_response(programs_dict)
success_msg = (
f"[SERVER] Successfully served {len(programs)} "
f"programs from {db_path} (attempt {i + 1})"
)
print(success_msg)
return # Success, exit the retry loop
except (sqlite3.OperationalError, sqlite3.DatabaseError) as e:
error_str = str(e).lower()
if "database is locked" in error_str or "busy" in error_str:
print(
f"[SERVER] Attempt {i + 1}/{max_retries} - database busy, "
f"retrying in {delay:.1f}s... ({e})"
)
if i < max_retries - 1:
time.sleep(delay)
delay = min(delay * 1.5, 2.0) # Exponential backoff, max 2s
continue
else:
print(f"[SERVER] Non-recoverable database error: {e}")
self.send_error(500, f"Database error: {str(e)}")
return
# Last retry failed
if i == max_retries - 1:
err_msg = (
f"[SERVER] Database still busy after {max_retries} attempts"
)
print(err_msg)
self.send_error(
503,
"Database temporarily unavailable - evolution may be running",
)
except Exception as e:
# Catch any other unexpected errors
print(f"[SERVER] An unexpected error occurred: {e}")
self.send_error(500, f"An unexpected error occurred: {str(e)}")
return # Don't retry on unknown errors
finally:
# Ensure database connection is properly closed
if db and hasattr(db, "close"):
try:
db.close()
except Exception as e:
print(f"[SERVER] Warning: Error closing database: {e}")
def handle_get_meta_files(self, db_path: str):
"""List available meta_{gen}.txt files for a given database."""
print(f"[SERVER] Listing meta files for DB: {db_path}")
# Get the actual database path
actual_db_path = self._get_actual_db_path(db_path)
# Get the directory containing the database file
abs_db_path = os.path.join(self.search_root, actual_db_path)
db_dir = os.path.dirname(abs_db_path)
if not os.path.exists(db_dir):
self.send_error(404, f"Database directory not found: {db_dir}")
return
meta_files = []
try:
# Look for meta_{gen}.txt files in the same directory as the DB
for file in os.listdir(db_dir):
if file.startswith("meta_") and file.endswith(".txt"):
# Extract generation number
gen_str = file[5:-4] # Remove 'meta_' and '.txt'
try:
generation = int(gen_str)
meta_files.append(
{
"generation": generation,
"filename": file,
"path": os.path.join(db_dir, file),
}
)
except ValueError:
# Skip files that don't have valid generation numbers
continue
# Sort by generation number
meta_files.sort(key=lambda x: x["generation"])
print(f"[SERVER] Found {len(meta_files)} meta files")
self.send_json_response(meta_files)
except Exception as e:
print(f"[SERVER] Error listing meta files: {e}")
self.send_error(500, f"Error listing meta files: {str(e)}")
def handle_get_meta_content(self, db_path: str, generation: str):
"""Get the content of a specific meta_{gen}.txt file."""
print(
f"[SERVER] Fetching meta content for DB: {db_path}, "
f"generation: {generation}"
)
# Get the actual database path
actual_db_path = self._get_actual_db_path(db_path)
# Get the directory containing the database file
abs_db_path = os.path.join(self.search_root, actual_db_path)
db_dir = os.path.dirname(abs_db_path)
# Construct the meta file path
meta_filename = f"meta_{generation}.txt"
meta_file_path = os.path.join(db_dir, meta_filename)
if not os.path.exists(meta_file_path):
self.send_error(404, f"Meta file not found: {meta_filename}")
return
try:
with open(meta_file_path, "r", encoding="utf-8") as f:
content = f.read()
response_data = {
"generation": int(generation),
"filename": meta_filename,
"content": content,
}
print(
f"[SERVER] Successfully served meta content for generation {generation}"
)
self.send_json_response(response_data)
except Exception as e:
print(f"[SERVER] Error reading meta file: {e}")
self.send_error(500, f"Error reading meta file: {str(e)}")
def handle_download_meta_pdf(self, db_path: str, generation: str):
"""Convert a specific meta_{gen}.txt file to PDF and serve it."""
print(
f"[SERVER] PDF download request for DB: {db_path}, generation: {generation}"
)
# Get the actual database path
actual_db_path = self._get_actual_db_path(db_path)
# Get the directory containing the database file
abs_db_path = os.path.join(self.search_root, actual_db_path)
db_dir = os.path.dirname(abs_db_path)
# Construct the meta file path
meta_filename = f"meta_{generation}.txt"
meta_file_path = os.path.join(db_dir, meta_filename)
if not os.path.exists(meta_file_path):
self.send_error(404, f"Meta file not found: {meta_filename}")
return
try:
with open(meta_file_path, "r", encoding="utf-8") as f:
content = f.read()
pdf_filename = f"meta_{generation}.pdf"
# Try to generate PDF using available methods
pdf_bytes = self._generate_pdf(content, generation)
if pdf_bytes is None:
print("[SERVER] All PDF generation methods failed, serving text")
# Fall back to serving formatted text with PDF headers
formatted_content = (
f"Meta Generation {generation}\n{'=' * 50}\n\n{content}"
)
pdf_bytes = formatted_content.encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/pdf")
self.send_header(
"Content-Disposition", f'attachment; filename="{pdf_filename}"'
)
self.send_header("Content-Length", str(len(pdf_bytes)))
self.end_headers()
self.wfile.write(pdf_bytes)
print(f"[SERVER] Successfully served PDF: {pdf_filename}")
except Exception as e:
print(f"[SERVER] Error converting meta file to PDF: {e}")
self.send_error(500, f"Error converting to PDF: {str(e)}")
def _generate_pdf(self, content: str, generation: str) -> bytes:
"""Generate PDF from markdown content using available methods."""
print(f"[SERVER] Attempting to generate PDF for generation {generation}")
# Method 1: Try simple HTML to PDF using browser print
try:
# Preprocess content to fix line break issues
processed_content = self._fix_line_breaks(content)
# Convert markdown to HTML with better line break handling
try:
html_content = markdown.markdown(
processed_content,
extensions=["extra", "nl2br"], # nl2br: newlines to
)
except Exception:
# Fallback if nl2br extension is not available
html_content = markdown.markdown(
processed_content, extensions=["extra"]
)
# Manually convert remaining single line breaks to
html_content = html_content.replace("\n", "
\n")
# Add boxes around program summaries after markdown conversion
print(
f"[SERVER] HTML content before boxing (first 500 chars): "
f"{html_content[:500]}"
)
html_content = self._add_program_boxes_html(html_content)
print(
f"[SERVER] HTML content after boxing (first 500 chars): "
f"{html_content[:500]}"
)
# Get the logo as base64
logo_data_uri = self._get_logo_base64()
# Create a well-formatted HTML document
html_full = f"""
tags that contain program summaries # Pattern matches
tags that start with Program Name:
program_pattern = r"( Program Name:[^<]*[\s\S]*?