Spaces:
Build error
Build error
| -- Hybrid search function for enhanced search capabilities | |
| CREATE OR REPLACE FUNCTION hybrid_match_documents( | |
| query_embedding vector(1536), | |
| match_count int, | |
| metadata_filter jsonb DEFAULT '{}', | |
| query_text text DEFAULT '' | |
| ) RETURNS TABLE ( | |
| id uuid, | |
| node_id text, | |
| text text, | |
| metadata_ jsonb, | |
| similarity float, | |
| source text | |
| ) AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| kb.id, | |
| kb.node_id, | |
| kb.text, | |
| kb.metadata_, | |
| 1 - (kb.embedding <=> query_embedding) AS similarity, | |
| COALESCE(kb.metadata_->>'source', 'unknown') AS source | |
| FROM knowledge_base kb | |
| WHERE | |
| CASE | |
| WHEN metadata_filter = '{}' THEN true | |
| ELSE kb.metadata_ @> metadata_filter | |
| END | |
| ORDER BY kb.embedding <=> query_embedding | |
| LIMIT match_count; | |
| END; | |
| $$ LANGUAGE plpgsql; | |
| -- Basic vector similarity search function (fallback) | |
| CREATE OR REPLACE FUNCTION match_documents( | |
| query_embedding vector(1536), | |
| match_count int | |
| ) RETURNS TABLE ( | |
| id uuid, | |
| node_id text, | |
| text text, | |
| metadata_ jsonb, | |
| similarity float | |
| ) AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| kb.id, | |
| kb.node_id, | |
| kb.text, | |
| kb.metadata_, | |
| 1 - (kb.embedding <=> query_embedding) AS similarity | |
| FROM knowledge_base kb | |
| ORDER BY kb.embedding <=> query_embedding | |
| LIMIT match_count; | |
| END; | |
| $$ LANGUAGE plpgsql; | |
| -- Knowledge base table creation | |
| CREATE TABLE IF NOT EXISTS knowledge_base ( | |
| id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), | |
| node_id TEXT UNIQUE NOT NULL, | |
| embedding VECTOR(1536) NOT NULL, | |
| text TEXT, | |
| metadata_ JSONB DEFAULT '{}'::jsonb, | |
| -- Link to your lifecycle tracking | |
| lifecycle_id TEXT REFERENCES knowledge_lifecycle(document_id), | |
| -- Performance tracking | |
| access_count INTEGER DEFAULT 0, | |
| last_accessed_at TIMESTAMP WITH TIME ZONE, | |
| created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), | |
| updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() | |
| ); | |
| -- Create index for vector similarity search | |
| CREATE INDEX IF NOT EXISTS knowledge_base_embedding_idx | |
| ON knowledge_base USING ivfflat (embedding vector_cosine_ops) | |
| WITH (lists = 100); | |
| -- Create index for metadata filtering | |
| CREATE INDEX IF NOT EXISTS knowledge_base_metadata_idx | |
| ON knowledge_base USING gin (metadata_); | |
| -- Create index for node_id lookups | |
| CREATE INDEX IF NOT EXISTS knowledge_base_node_id_idx | |
| ON knowledge_base (node_id); | |
| -- Create the optimized HNSW index | |
| CREATE INDEX idx_kb_embedding_hnsw ON knowledge_base | |
| USING hnsw (embedding vector_cosine_ops) | |
| WITH (m = 24, ef_construction = 200); | |
| -- Trigger to update updated_at timestamp | |
| CREATE OR REPLACE FUNCTION update_updated_at_column() | |
| RETURNS TRIGGER AS $$ | |
| BEGIN | |
| NEW.updated_at = now(); | |
| RETURN NEW; | |
| END; | |
| $$ language 'plpgsql'; | |
| CREATE TRIGGER update_knowledge_base_updated_at | |
| BEFORE UPDATE ON knowledge_base | |
| FOR EACH ROW | |
| EXECUTE FUNCTION update_updated_at_column(); | |
| -- Supabase SQL Functions for AI Agent System | |
| -- These functions provide the missing RPC endpoints referenced in database_enhanced.py | |
| -- Enable vector extension if not already enabled | |
| CREATE EXTENSION IF NOT EXISTS vector; | |
| -- Create knowledge_base table if it doesn't exist | |
| CREATE TABLE IF NOT EXISTS knowledge_base ( | |
| id BIGSERIAL PRIMARY KEY, | |
| node_id TEXT UNIQUE NOT NULL, | |
| text TEXT NOT NULL, | |
| embedding vector(1536), -- OpenAI embedding dimension | |
| metadata_ JSONB DEFAULT '{}', | |
| source TEXT DEFAULT 'unknown', | |
| created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), | |
| updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() | |
| ); | |
| -- Create indexes for better performance | |
| CREATE INDEX IF NOT EXISTS knowledge_base_embedding_idx ON knowledge_base USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); | |
| CREATE INDEX IF NOT EXISTS knowledge_base_text_idx ON knowledge_base USING gin(to_tsvector('english', text)); | |
| CREATE INDEX IF NOT EXISTS knowledge_base_metadata_idx ON knowledge_base USING gin(metadata_); | |
| -- Function for simple vector similarity search | |
| CREATE OR REPLACE FUNCTION match_documents( | |
| query_embedding vector(1536), | |
| match_count int DEFAULT 5, | |
| filter_metadata jsonb DEFAULT '{}' | |
| ) | |
| RETURNS TABLE ( | |
| id bigint, | |
| text text, | |
| metadata_ jsonb, | |
| similarity float | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| kb.id, | |
| kb.text, | |
| kb.metadata_, | |
| 1 - (kb.embedding <=> query_embedding) as similarity | |
| FROM knowledge_base kb | |
| WHERE kb.embedding IS NOT NULL | |
| AND (filter_metadata = '{}' OR kb.metadata_ @> filter_metadata) | |
| ORDER BY kb.embedding <=> query_embedding | |
| LIMIT match_count; | |
| END; | |
| $$; | |
| -- Function for hybrid search combining vector similarity and text search | |
| CREATE OR REPLACE FUNCTION hybrid_match_documents( | |
| query_embedding vector(1536), | |
| query_text text, | |
| match_count int DEFAULT 5, | |
| metadata_filter jsonb DEFAULT '{}' | |
| ) | |
| RETURNS TABLE ( | |
| id bigint, | |
| text text, | |
| metadata_ jsonb, | |
| similarity float, | |
| source text | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| vector_weight float := 0.7; | |
| text_weight float := 0.3; | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| kb.id, | |
| kb.text, | |
| kb.metadata_, | |
| (vector_weight * (1 - (kb.embedding <=> query_embedding)) + | |
| text_weight * ts_rank(to_tsvector('english', kb.text), plainto_tsquery('english', query_text))) as similarity, | |
| kb.source | |
| FROM knowledge_base kb | |
| WHERE kb.embedding IS NOT NULL | |
| AND (metadata_filter = '{}' OR kb.metadata_ @> metadata_filter) | |
| AND to_tsvector('english', kb.text) @@ plainto_tsquery('english', query_text) | |
| ORDER BY similarity DESC | |
| LIMIT match_count; | |
| END; | |
| $$; | |
| -- Function for BM25-style text search | |
| CREATE OR REPLACE FUNCTION bm25_search( | |
| query_text text, | |
| match_count int DEFAULT 5, | |
| metadata_filter jsonb DEFAULT '{}' | |
| ) | |
| RETURNS TABLE ( | |
| id bigint, | |
| text text, | |
| metadata_ jsonb, | |
| score float, | |
| source text | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| kb.id, | |
| kb.text, | |
| kb.metadata_, | |
| ts_rank_cd(to_tsvector('english', kb.text), plainto_tsquery('english', query_text)) as score, | |
| kb.source | |
| FROM knowledge_base kb | |
| WHERE (metadata_filter = '{}' OR kb.metadata_ @> metadata_filter) | |
| AND to_tsvector('english', kb.text) @@ plainto_tsquery('english', query_text) | |
| ORDER BY score DESC | |
| LIMIT match_count; | |
| END; | |
| $$; | |
| -- Function to insert or update documents with embeddings | |
| CREATE OR REPLACE FUNCTION upsert_document( | |
| p_node_id text, | |
| p_text text, | |
| p_embedding vector(1536), | |
| p_metadata jsonb DEFAULT '{}', | |
| p_source text DEFAULT 'unknown' | |
| ) | |
| RETURNS bigint | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| doc_id bigint; | |
| BEGIN | |
| INSERT INTO knowledge_base (node_id, text, embedding, metadata_, source) | |
| VALUES (p_node_id, p_text, p_embedding, p_metadata, p_source) | |
| ON CONFLICT (node_id) | |
| DO UPDATE SET | |
| text = EXCLUDED.text, | |
| embedding = EXCLUDED.embedding, | |
| metadata_ = EXCLUDED.metadata_, | |
| source = EXCLUDED.source, | |
| updated_at = NOW() | |
| RETURNING id INTO doc_id; | |
| RETURN doc_id; | |
| END; | |
| $$; | |
| -- Function to batch insert documents | |
| CREATE OR REPLACE FUNCTION batch_insert_documents( | |
| documents jsonb | |
| ) | |
| RETURNS int | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| doc jsonb; | |
| inserted_count int := 0; | |
| BEGIN | |
| FOR doc IN SELECT * FROM jsonb_array_elements(documents) | |
| LOOP | |
| INSERT INTO knowledge_base ( | |
| node_id, | |
| text, | |
| embedding, | |
| metadata_, | |
| source | |
| ) | |
| VALUES ( | |
| (doc->>'node_id')::text, | |
| (doc->>'text')::text, | |
| (doc->>'embedding')::vector(1536), | |
| COALESCE(doc->'metadata_', '{}'::jsonb), | |
| COALESCE(doc->>'source', 'unknown') | |
| ) | |
| ON CONFLICT (node_id) | |
| DO UPDATE SET | |
| text = EXCLUDED.text, | |
| embedding = EXCLUDED.embedding, | |
| metadata_ = EXCLUDED.metadata_, | |
| source = EXCLUDED.source, | |
| updated_at = NOW(); | |
| inserted_count := inserted_count + 1; | |
| END LOOP; | |
| RETURN inserted_count; | |
| END; | |
| $$; | |
| -- Function to get document statistics | |
| CREATE OR REPLACE FUNCTION get_document_stats() | |
| RETURNS TABLE ( | |
| total_documents bigint, | |
| total_embeddings bigint, | |
| avg_text_length float, | |
| sources text[] | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| COUNT(*) as total_documents, | |
| COUNT(embedding) as total_embeddings, | |
| AVG(LENGTH(text)) as avg_text_length, | |
| ARRAY_AGG(DISTINCT source) as sources | |
| FROM knowledge_base; | |
| END; | |
| $$; | |
| -- Function to search by metadata only | |
| CREATE OR REPLACE FUNCTION search_by_metadata( | |
| metadata_filter jsonb, | |
| match_count int DEFAULT 10 | |
| ) | |
| RETURNS TABLE ( | |
| id bigint, | |
| text text, | |
| metadata_ jsonb, | |
| source text | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| kb.id, | |
| kb.text, | |
| kb.metadata_, | |
| kb.source | |
| FROM knowledge_base kb | |
| WHERE kb.metadata_ @> metadata_filter | |
| ORDER BY kb.created_at DESC | |
| LIMIT match_count; | |
| END; | |
| $$; | |
| -- Function to delete documents by metadata | |
| CREATE OR REPLACE FUNCTION delete_documents_by_metadata( | |
| metadata_filter jsonb | |
| ) | |
| RETURNS int | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| deleted_count int; | |
| BEGIN | |
| DELETE FROM knowledge_base | |
| WHERE metadata_ @> metadata_filter; | |
| GET DIAGNOSTICS deleted_count = ROW_COUNT; | |
| RETURN deleted_count; | |
| END; | |
| $$; | |
| -- Create tool_metrics table for monitoring | |
| CREATE TABLE IF NOT EXISTS tool_metrics ( | |
| id BIGSERIAL PRIMARY KEY, | |
| tool_name TEXT NOT NULL, | |
| execution_time_ms INTEGER, | |
| success BOOLEAN, | |
| error_message TEXT, | |
| metadata JSONB DEFAULT '{}', | |
| created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() | |
| ); | |
| -- Index for tool metrics | |
| CREATE INDEX IF NOT EXISTS tool_metrics_tool_name_idx ON tool_metrics(tool_name); | |
| CREATE INDEX IF NOT EXISTS tool_metrics_created_at_idx ON tool_metrics(created_at); | |
| -- Function to record tool execution metrics | |
| CREATE OR REPLACE FUNCTION record_tool_metric( | |
| p_tool_name text, | |
| p_execution_time_ms integer, | |
| p_success boolean, | |
| p_error_message text DEFAULT NULL, | |
| p_metadata jsonb DEFAULT '{}' | |
| ) | |
| RETURNS bigint | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| metric_id bigint; | |
| BEGIN | |
| INSERT INTO tool_metrics ( | |
| tool_name, | |
| execution_time_ms, | |
| success, | |
| error_message, | |
| metadata | |
| ) | |
| VALUES ( | |
| p_tool_name, | |
| p_execution_time_ms, | |
| p_success, | |
| p_error_message, | |
| p_metadata | |
| ) | |
| RETURNING id INTO metric_id; | |
| RETURN metric_id; | |
| END; | |
| $$; | |
| -- Function to get tool reliability statistics | |
| CREATE OR REPLACE FUNCTION get_tool_reliability_stats( | |
| days_back integer DEFAULT 7 | |
| ) | |
| RETURNS TABLE ( | |
| tool_name text, | |
| total_executions bigint, | |
| successful_executions bigint, | |
| success_rate float, | |
| avg_execution_time float, | |
| last_execution timestamp with time zone | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| SELECT | |
| tm.tool_name, | |
| COUNT(*) as total_executions, | |
| COUNT(*) FILTER (WHERE tm.success) as successful_executions, | |
| ROUND( | |
| COUNT(*) FILTER (WHERE tm.success)::float / COUNT(*)::float * 100, | |
| 2 | |
| ) as success_rate, | |
| ROUND(AVG(tm.execution_time_ms), 2) as avg_execution_time, | |
| MAX(tm.created_at) as last_execution | |
| FROM tool_metrics tm | |
| WHERE tm.created_at >= NOW() - INTERVAL '1 day' * days_back | |
| GROUP BY tm.tool_name | |
| ORDER BY success_rate DESC; | |
| END; | |
| $$; | |
| -- Grant necessary permissions | |
| GRANT USAGE ON SCHEMA public TO anon, authenticated; | |
| GRANT ALL ON ALL TABLES IN SCHEMA public TO anon, authenticated; | |
| GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO anon, authenticated; | |
| GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO anon, authenticated; | |
| -- ============================================ | |
| -- WORLD-CLASS SEARCH FUNCTION WITH YOUR FEATURES | |
| -- ============================================ | |
| CREATE OR REPLACE FUNCTION intelligent_search( | |
| query_text TEXT, | |
| query_embedding VECTOR(1536), | |
| match_count INT DEFAULT 5, | |
| filter JSONB DEFAULT '{}', | |
| use_clarification BOOLEAN DEFAULT TRUE, | |
| session_id UUID DEFAULT NULL | |
| ) RETURNS TABLE ( | |
| -- Search results | |
| id UUID, | |
| node_id TEXT, | |
| text TEXT, | |
| metadata_ JSONB, | |
| similarity FLOAT, | |
| rank INT, | |
| -- Intelligence features | |
| needs_clarification BOOLEAN, | |
| suggested_clarification TEXT, | |
| reliability_score FLOAT, | |
| freshness_score FLOAT | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| clarification_needed BOOLEAN := FALSE; | |
| clarification_text TEXT; | |
| query_category TEXT; | |
| BEGIN | |
| -- Step 1: Check if we need clarification | |
| IF use_clarification THEN | |
| SELECT | |
| cp.clarification_question, | |
| cp.query_category | |
| INTO clarification_text, query_category | |
| FROM clarification_patterns cp | |
| WHERE 1 - (cp.query_embedding <=> query_embedding) > 0.85 | |
| ORDER BY cp.effectiveness_score DESC, cp.frequency DESC | |
| LIMIT 1; | |
| clarification_needed := (clarification_text IS NOT NULL); | |
| END IF; | |
| -- Step 2: Log query to session if provided | |
| IF session_id IS NOT NULL THEN | |
| UPDATE user_sessions | |
| SET | |
| total_queries = total_queries + 1, | |
| last_active_at = NOW(), | |
| conversation_history = conversation_history || | |
| jsonb_build_object( | |
| 'query', query_text, | |
| 'timestamp', NOW(), | |
| 'category', COALESCE(query_category, 'general') | |
| ) | |
| WHERE session_id = session_id; | |
| END IF; | |
| -- Step 3: Perform intelligent search with freshness and reliability | |
| RETURN QUERY | |
| WITH base_results AS ( | |
| SELECT | |
| kb.id, | |
| kb.node_id, | |
| kb.text, | |
| kb.metadata_, | |
| 1 - (kb.embedding <=> query_embedding) AS base_similarity, | |
| kb.lifecycle_id, | |
| kb.access_count | |
| FROM knowledge_base kb | |
| WHERE | |
| CASE | |
| WHEN filter = '{}' THEN TRUE | |
| ELSE kb.metadata_ @> filter | |
| END | |
| AND 1 - (kb.embedding <=> query_embedding) > 0.5 -- Minimum threshold | |
| ORDER BY kb.embedding <=> query_embedding | |
| LIMIT match_count * 3 -- Get extra for reranking | |
| ), | |
| enhanced_results AS ( | |
| SELECT | |
| br.*, | |
| -- Calculate freshness score | |
| CASE | |
| WHEN kl.validation_status = 'valid' THEN 1.0 | |
| WHEN kl.validation_status = 'stale' THEN 0.7 | |
| WHEN kl.validation_status = 'expired' THEN 0.3 | |
| ELSE 0.5 | |
| END * GREATEST(0, 1 - (EXTRACT(EPOCH FROM (NOW() - kl.last_validated_at)) / 86400 / 30)) AS freshness, | |
| -- Calculate reliability based on source | |
| CASE | |
| WHEN br.metadata_->>'source_type' = 'official' THEN 1.0 | |
| WHEN br.metadata_->>'source_type' = 'verified' THEN 0.9 | |
| WHEN br.metadata_->>'source_type' = 'community' THEN 0.7 | |
| ELSE 0.5 | |
| END AS source_reliability, | |
| -- Boost for frequently accessed | |
| 1 + (0.1 * LN(GREATEST(1, br.access_count))) AS popularity_boost | |
| FROM base_results br | |
| LEFT JOIN knowledge_lifecycle kl ON br.lifecycle_id = kl.document_id | |
| ), | |
| final_scored AS ( | |
| SELECT | |
| er.*, | |
| -- Combine all factors for final score | |
| er.base_similarity * | |
| er.freshness * | |
| er.source_reliability * | |
| er.popularity_boost AS final_score | |
| FROM enhanced_results er | |
| ) | |
| SELECT | |
| fs.id, | |
| fs.node_id, | |
| fs.text, | |
| fs.metadata_, | |
| fs.final_score AS similarity, | |
| ROW_NUMBER() OVER (ORDER BY fs.final_score DESC)::INT AS rank, | |
| clarification_needed AS needs_clarification, | |
| clarification_text AS suggested_clarification, | |
| fs.source_reliability AS reliability_score, | |
| fs.freshness AS freshness_score | |
| FROM final_scored fs | |
| ORDER BY fs.final_score DESC | |
| LIMIT match_count; | |
| -- Update access counts | |
| UPDATE knowledge_base | |
| SET | |
| access_count = access_count + 1, | |
| last_accessed_at = NOW() | |
| WHERE id IN ( | |
| SELECT id FROM final_scored | |
| ORDER BY final_score DESC | |
| LIMIT match_count | |
| ); | |
| END; | |
| $$; | |
| -- ============================================ | |
| -- TOOL RELIABILITY ENHANCEMENT | |
| -- ============================================ | |
| -- Add function to get best tool for a task | |
| CREATE OR REPLACE FUNCTION get_reliable_tool( | |
| tool_category TEXT, | |
| min_success_rate FLOAT DEFAULT 0.7 | |
| ) RETURNS TABLE ( | |
| tool_name TEXT, | |
| success_rate FLOAT, | |
| avg_latency_ms FLOAT, | |
| is_fallback BOOLEAN | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| WITH tool_stats AS ( | |
| SELECT | |
| t.tool_name, | |
| CASE | |
| WHEN t.total_calls > 0 | |
| THEN t.success_count::FLOAT / t.total_calls | |
| ELSE 0.5 -- Default for new tools | |
| END AS success_rate, | |
| t.average_latency_ms, | |
| FALSE AS is_fallback | |
| FROM tool_reliability_metrics t | |
| WHERE t.tool_name LIKE tool_category || '%' | |
| AND t.last_used_at > NOW() - INTERVAL '30 days' | |
| ) | |
| SELECT * FROM tool_stats | |
| WHERE success_rate >= min_success_rate | |
| UNION ALL | |
| -- Include fallback tools if main tools are unreliable | |
| SELECT | |
| ft.tool_name, | |
| 0.5 AS success_rate, | |
| 1000.0 AS avg_latency_ms, | |
| TRUE AS is_fallback | |
| FROM tool_reliability_metrics t | |
| CROSS JOIN LATERAL jsonb_array_elements_text(t.fallback_tools) AS ft(tool_name) | |
| WHERE t.tool_name LIKE tool_category || '%' | |
| AND NOT EXISTS ( | |
| SELECT 1 FROM tool_stats ts | |
| WHERE ts.success_rate >= min_success_rate | |
| ) | |
| ORDER BY is_fallback ASC, success_rate DESC, avg_latency_ms ASC | |
| LIMIT 1; | |
| END; | |
| $$; | |
| -- ============================================ | |
| -- CLARIFICATION LEARNING SYSTEM | |
| -- ============================================ | |
| CREATE OR REPLACE FUNCTION learn_from_clarification( | |
| p_original_query TEXT, | |
| p_query_embedding VECTOR(1536), | |
| p_clarification_question TEXT, | |
| p_user_response TEXT, | |
| p_was_helpful BOOLEAN | |
| ) RETURNS VOID | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| pattern_id TEXT; | |
| BEGIN | |
| pattern_id := encode(digest(p_original_query || p_clarification_question, 'sha256'), 'hex'); | |
| INSERT INTO clarification_patterns ( | |
| id, | |
| original_query, | |
| query_embedding, | |
| clarification_question, | |
| user_response, | |
| query_category, | |
| effectiveness_score | |
| ) VALUES ( | |
| pattern_id, | |
| p_original_query, | |
| p_query_embedding, | |
| p_clarification_question, | |
| p_user_response, | |
| 'general', -- You might want to classify this | |
| CASE WHEN p_was_helpful THEN 0.8 ELSE 0.2 END | |
| ) | |
| ON CONFLICT (id) DO UPDATE | |
| SET | |
| frequency = clarification_patterns.frequency + 1, | |
| effectiveness_score = | |
| (clarification_patterns.effectiveness_score * clarification_patterns.frequency + | |
| CASE WHEN p_was_helpful THEN 1.0 ELSE 0.0 END) / | |
| (clarification_patterns.frequency + 1), | |
| last_seen_at = NOW(); | |
| END; | |
| $$; | |
| -- ============================================ | |
| -- KNOWLEDGE LIFECYCLE AUTOMATION | |
| -- ============================================ | |
| -- Function to automatically mark stale documents | |
| CREATE OR REPLACE FUNCTION update_stale_documents() | |
| RETURNS INTEGER | |
| LANGUAGE plpgsql | |
| AS $$ | |
| DECLARE | |
| updated_count INTEGER; | |
| BEGIN | |
| UPDATE knowledge_lifecycle | |
| SET | |
| validation_status = | |
| CASE | |
| WHEN expires_at < NOW() THEN 'expired' | |
| WHEN last_validated_at < NOW() - (update_frequency_days || ' days')::INTERVAL THEN 'stale' | |
| ELSE validation_status | |
| END, | |
| updated_at = NOW() | |
| WHERE validation_status IN ('valid', 'stale') | |
| AND ( | |
| expires_at < NOW() | |
| OR last_validated_at < NOW() - (update_frequency_days || ' days')::INTERVAL | |
| ); | |
| GET DIAGNOSTICS updated_count = ROW_COUNT; | |
| RETURN updated_count; | |
| END; | |
| $$; | |
| -- ============================================ | |
| -- SESSION ANALYTICS | |
| -- ============================================ | |
| CREATE OR REPLACE VIEW session_analytics AS | |
| SELECT | |
| user_id, | |
| COUNT(DISTINCT session_id) as total_sessions, | |
| SUM(total_queries) as total_queries, | |
| AVG(CASE WHEN total_queries > 0 | |
| THEN successful_queries::FLOAT / total_queries | |
| ELSE 0 END) as avg_success_rate, | |
| AVG(average_steps_per_query) as avg_steps, | |
| MAX(last_active_at) as last_seen | |
| FROM user_sessions | |
| GROUP BY user_id; | |
| -- ============================================ | |
| -- ERROR PATTERN DETECTION | |
| -- ============================================ | |
| CREATE OR REPLACE FUNCTION detect_error_patterns() | |
| RETURNS TABLE ( | |
| error_type TEXT, | |
| frequency INTEGER, | |
| common_queries TEXT[], | |
| suggested_fix TEXT | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| RETURN QUERY | |
| WITH error_analysis AS ( | |
| SELECT | |
| CASE | |
| WHEN state_hash IN ( | |
| SELECT state_hash | |
| FROM recursion_error_logs | |
| GROUP BY state_hash | |
| HAVING COUNT(*) > 3 | |
| ) THEN 'recurring_loop' | |
| WHEN loop_count > 10 THEN 'excessive_iteration' | |
| WHEN stagnation_score > 5 THEN 'stagnation_pattern' | |
| ELSE 'other' | |
| END AS error_type, | |
| query, | |
| resolution_strategy | |
| FROM recursion_error_logs | |
| WHERE created_at > NOW() - INTERVAL '7 days' | |
| ) | |
| SELECT | |
| ea.error_type, | |
| COUNT(*)::INTEGER as frequency, | |
| ARRAY_AGG(DISTINCT LEFT(ea.query, 50)) as common_queries, | |
| CASE ea.error_type | |
| WHEN 'recurring_loop' THEN 'Implement state diversity check' | |
| WHEN 'excessive_iteration' THEN 'Add iteration limits to planning' | |
| WHEN 'stagnation_pattern' THEN 'Force tool switching after stagnation' | |
| ELSE 'Review agent logs for pattern' | |
| END as suggested_fix | |
| FROM error_analysis ea | |
| GROUP BY ea.error_type | |
| HAVING COUNT(*) > 2 | |
| ORDER BY COUNT(*) DESC; | |
| END; | |
| $$; | |
| -- ============================================ | |
| -- PERFORMANCE MONITORING DASHBOARD | |
| -- ============================================ | |
| CREATE OR REPLACE VIEW agent_performance_dashboard AS | |
| WITH recent_metrics AS ( | |
| SELECT * FROM tool_reliability_metrics | |
| WHERE last_used_at > NOW() - INTERVAL '24 hours' | |
| ) | |
| SELECT | |
| 'Active Tools' as metric, | |
| COUNT(DISTINCT tool_name)::TEXT as value, | |
| 'tools' as unit | |
| FROM recent_metrics | |
| UNION ALL | |
| SELECT | |
| 'Average Success Rate', | |
| ROUND(AVG(CASE WHEN total_calls > 0 | |
| THEN success_count::FLOAT / total_calls | |
| ELSE 0 END) * 100, 1)::TEXT, | |
| '%' | |
| FROM recent_metrics | |
| UNION ALL | |
| SELECT | |
| 'Pending Approvals', | |
| COUNT(*)::TEXT, | |
| 'requests' | |
| FROM human_approval_requests | |
| WHERE approval_status = 'pending' | |
| UNION ALL | |
| SELECT | |
| 'Knowledge Freshness', | |
| ROUND(100.0 * COUNT(CASE WHEN validation_status = 'valid' THEN 1 END) / | |
| NULLIF(COUNT(*), 0), 1)::TEXT, | |
| '%' | |
| FROM knowledge_lifecycle; |