SanadLLM / sql /sample_data.sql
Hydra-Bolt
added
8de5b21
-- Sample data for testing SanadCheck LLM API
-- Execute after creating tables
-- Insert sample users (these will be created through the auth system)
-- This is just for reference - actual users will be created via the API
-- Sample narrator extractions for testing
INSERT INTO public.narrator_extractions (
id,
user_id,
hadith_text,
extracted_narrators,
sanad_chain,
success,
processing_time_ms,
created_at
) VALUES
-- Note: Replace with actual user UUIDs after creating users through the API
-- (
-- uuid_generate_v4(),
-- 'user-uuid-here',
-- 'حدثنا محمد بن إسماعيل قال حدثنا عبد الله بن موسى قال أخبرنا إسرائيل عن أبي إسحاق عن البراء قال...',
-- ARRAY['محمد بن إسماعيل', 'عبد الله بن موسى', 'إسرائيل', 'أبو إسحاق', 'البراء'],
-- 'محمد بن إسماعيل ← عبد الله بن موسى ← إسرائيل ← أبي إسحاق ← البراء',
-- true,
-- 1500,
-- NOW() - INTERVAL '1 day'
-- );
-- Create some utility functions for data analysis
-- Function to get extraction statistics
CREATE OR REPLACE FUNCTION public.get_extraction_stats()
RETURNS TABLE (
total_extractions BIGINT,
successful_extractions BIGINT,
failed_extractions BIGINT,
success_rate NUMERIC,
avg_processing_time NUMERIC,
total_users BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(*) as total_extractions,
COUNT(*) FILTER (WHERE success = true) as successful_extractions,
COUNT(*) FILTER (WHERE success = false) as failed_extractions,
ROUND(
(COUNT(*) FILTER (WHERE success = true)::NUMERIC / COUNT(*)::NUMERIC) * 100,
2
) as success_rate,
ROUND(AVG(processing_time_ms), 2) as avg_processing_time,
COUNT(DISTINCT user_id) as total_users
FROM public.narrator_extractions;
END;
$$ LANGUAGE plpgsql;
-- Function to get most analyzed narrators
CREATE OR REPLACE FUNCTION public.get_popular_narrators(limit_count INTEGER DEFAULT 10)
RETURNS TABLE (
narrator_name TEXT,
analysis_count BIGINT,
avg_reliability_grade TEXT,
most_common_grade TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT
na.narrator_name,
COUNT(*) as analysis_count,
MODE() WITHIN GROUP (ORDER BY na.reliability_grade) as most_common_grade,
ROUND(AVG(
CASE
WHEN na.reliability_grade = 'Thiqah' THEN 5
WHEN na.reliability_grade = 'Saduq' THEN 4
WHEN na.reliability_grade = 'Da''if' THEN 2
WHEN na.reliability_grade = 'Matruk' THEN 1
ELSE 0
END
), 2)::TEXT as avg_reliability_grade
FROM public.narrator_analyses na
WHERE na.success = true
GROUP BY na.narrator_name
ORDER BY analysis_count DESC
LIMIT limit_count;
END;
$$ LANGUAGE plpgsql;
-- Function to get user activity summary
CREATE OR REPLACE FUNCTION public.get_user_activity(user_uuid UUID)
RETURNS TABLE (
total_extractions BIGINT,
successful_extractions BIGINT,
total_analyses BIGINT,
successful_analyses BIGINT,
first_activity TIMESTAMP WITH TIME ZONE,
last_activity TIMESTAMP WITH TIME ZONE,
avg_processing_time NUMERIC
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(ne.id) as total_extractions,
COUNT(ne.id) FILTER (WHERE ne.success = true) as successful_extractions,
COUNT(na.id) as total_analyses,
COUNT(na.id) FILTER (WHERE na.success = true) as successful_analyses,
MIN(LEAST(ne.created_at, na.created_at)) as first_activity,
MAX(GREATEST(ne.created_at, na.created_at)) as last_activity,
ROUND(AVG(COALESCE(ne.processing_time_ms, na.processing_time_ms)), 2) as avg_processing_time
FROM public.narrator_extractions ne
FULL OUTER JOIN public.narrator_analyses na ON ne.user_id = na.user_id
WHERE ne.user_id = user_uuid OR na.user_id = user_uuid;
END;
$$ LANGUAGE plpgsql;
-- Create indexes for common queries
CREATE INDEX IF NOT EXISTS idx_narrator_extractions_text_search ON public.narrator_extractions
USING gin(to_tsvector('arabic', hadith_text));
CREATE INDEX IF NOT EXISTS idx_narrator_analyses_name_search ON public.narrator_analyses
USING gin(to_tsvector('arabic', narrator_name));
-- Grant execute permissions on functions
GRANT EXECUTE ON FUNCTION public.get_extraction_stats() TO authenticated;
GRANT EXECUTE ON FUNCTION public.get_popular_narrators(INTEGER) TO authenticated;
GRANT EXECUTE ON FUNCTION public.get_user_activity(UUID) TO authenticated;
-- Create a cleanup function for old sessions
CREATE OR REPLACE FUNCTION public.cleanup_expired_sessions()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
-- Delete sessions that have been expired for more than 7 days
DELETE FROM public.user_sessions
WHERE expires_at < NOW() - INTERVAL '7 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- Grant execute permission
GRANT EXECUTE ON FUNCTION public.cleanup_expired_sessions() TO authenticated;
-- Comments for documentation
COMMENT ON TABLE public.users IS 'Extended user profiles linked to Supabase auth.users';
COMMENT ON TABLE public.user_sessions IS 'JWT session tracking for security and analytics';
COMMENT ON TABLE public.narrator_extractions IS 'Records of hadith narrator extraction requests and results';
COMMENT ON TABLE public.narrator_analyses IS 'Records of individual narrator analysis requests and results';
COMMENT ON FUNCTION public.get_extraction_stats() IS 'Returns overall statistics about extraction operations';
COMMENT ON FUNCTION public.get_popular_narrators(INTEGER) IS 'Returns most frequently analyzed narrators';
COMMENT ON FUNCTION public.get_user_activity(UUID) IS 'Returns activity summary for a specific user';
COMMENT ON FUNCTION public.cleanup_expired_sessions() IS 'Removes old expired sessions to keep the database clean';