File size: 6,072 Bytes
8de5b21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
-- Sample data for testing SanadCheck LLM API
-- Execute after creating tables
-- Insert sample users (these will be created through the auth system)
-- This is just for reference - actual users will be created via the API
-- Sample narrator extractions for testing
INSERT INTO public.narrator_extractions (
id,
user_id,
hadith_text,
extracted_narrators,
sanad_chain,
success,
processing_time_ms,
created_at
) VALUES
-- Note: Replace with actual user UUIDs after creating users through the API
-- (
-- uuid_generate_v4(),
-- 'user-uuid-here',
-- 'حدثنا محمد بن إسماعيل قال حدثنا عبد الله بن موسى قال أخبرنا إسرائيل عن أبي إسحاق عن البراء قال...',
-- ARRAY['محمد بن إسماعيل', 'عبد الله بن موسى', 'إسرائيل', 'أبو إسحاق', 'البراء'],
-- 'محمد بن إسماعيل ← عبد الله بن موسى ← إسرائيل ← أبي إسحاق ← البراء',
-- true,
-- 1500,
-- NOW() - INTERVAL '1 day'
-- );
-- Create some utility functions for data analysis
-- Function to get extraction statistics
CREATE OR REPLACE FUNCTION public.get_extraction_stats()
RETURNS TABLE (
total_extractions BIGINT,
successful_extractions BIGINT,
failed_extractions BIGINT,
success_rate NUMERIC,
avg_processing_time NUMERIC,
total_users BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(*) as total_extractions,
COUNT(*) FILTER (WHERE success = true) as successful_extractions,
COUNT(*) FILTER (WHERE success = false) as failed_extractions,
ROUND(
(COUNT(*) FILTER (WHERE success = true)::NUMERIC / COUNT(*)::NUMERIC) * 100,
2
) as success_rate,
ROUND(AVG(processing_time_ms), 2) as avg_processing_time,
COUNT(DISTINCT user_id) as total_users
FROM public.narrator_extractions;
END;
$$ LANGUAGE plpgsql;
-- Function to get most analyzed narrators
CREATE OR REPLACE FUNCTION public.get_popular_narrators(limit_count INTEGER DEFAULT 10)
RETURNS TABLE (
narrator_name TEXT,
analysis_count BIGINT,
avg_reliability_grade TEXT,
most_common_grade TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT
na.narrator_name,
COUNT(*) as analysis_count,
MODE() WITHIN GROUP (ORDER BY na.reliability_grade) as most_common_grade,
ROUND(AVG(
CASE
WHEN na.reliability_grade = 'Thiqah' THEN 5
WHEN na.reliability_grade = 'Saduq' THEN 4
WHEN na.reliability_grade = 'Da''if' THEN 2
WHEN na.reliability_grade = 'Matruk' THEN 1
ELSE 0
END
), 2)::TEXT as avg_reliability_grade
FROM public.narrator_analyses na
WHERE na.success = true
GROUP BY na.narrator_name
ORDER BY analysis_count DESC
LIMIT limit_count;
END;
$$ LANGUAGE plpgsql;
-- Function to get user activity summary
CREATE OR REPLACE FUNCTION public.get_user_activity(user_uuid UUID)
RETURNS TABLE (
total_extractions BIGINT,
successful_extractions BIGINT,
total_analyses BIGINT,
successful_analyses BIGINT,
first_activity TIMESTAMP WITH TIME ZONE,
last_activity TIMESTAMP WITH TIME ZONE,
avg_processing_time NUMERIC
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(ne.id) as total_extractions,
COUNT(ne.id) FILTER (WHERE ne.success = true) as successful_extractions,
COUNT(na.id) as total_analyses,
COUNT(na.id) FILTER (WHERE na.success = true) as successful_analyses,
MIN(LEAST(ne.created_at, na.created_at)) as first_activity,
MAX(GREATEST(ne.created_at, na.created_at)) as last_activity,
ROUND(AVG(COALESCE(ne.processing_time_ms, na.processing_time_ms)), 2) as avg_processing_time
FROM public.narrator_extractions ne
FULL OUTER JOIN public.narrator_analyses na ON ne.user_id = na.user_id
WHERE ne.user_id = user_uuid OR na.user_id = user_uuid;
END;
$$ LANGUAGE plpgsql;
-- Create indexes for common queries
CREATE INDEX IF NOT EXISTS idx_narrator_extractions_text_search ON public.narrator_extractions
USING gin(to_tsvector('arabic', hadith_text));
CREATE INDEX IF NOT EXISTS idx_narrator_analyses_name_search ON public.narrator_analyses
USING gin(to_tsvector('arabic', narrator_name));
-- Grant execute permissions on functions
GRANT EXECUTE ON FUNCTION public.get_extraction_stats() TO authenticated;
GRANT EXECUTE ON FUNCTION public.get_popular_narrators(INTEGER) TO authenticated;
GRANT EXECUTE ON FUNCTION public.get_user_activity(UUID) TO authenticated;
-- Create a cleanup function for old sessions
CREATE OR REPLACE FUNCTION public.cleanup_expired_sessions()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
-- Delete sessions that have been expired for more than 7 days
DELETE FROM public.user_sessions
WHERE expires_at < NOW() - INTERVAL '7 days';
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- Grant execute permission
GRANT EXECUTE ON FUNCTION public.cleanup_expired_sessions() TO authenticated;
-- Comments for documentation
COMMENT ON TABLE public.users IS 'Extended user profiles linked to Supabase auth.users';
COMMENT ON TABLE public.user_sessions IS 'JWT session tracking for security and analytics';
COMMENT ON TABLE public.narrator_extractions IS 'Records of hadith narrator extraction requests and results';
COMMENT ON TABLE public.narrator_analyses IS 'Records of individual narrator analysis requests and results';
COMMENT ON FUNCTION public.get_extraction_stats() IS 'Returns overall statistics about extraction operations';
COMMENT ON FUNCTION public.get_popular_narrators(INTEGER) IS 'Returns most frequently analyzed narrators';
COMMENT ON FUNCTION public.get_user_activity(UUID) IS 'Returns activity summary for a specific user';
COMMENT ON FUNCTION public.cleanup_expired_sessions() IS 'Removes old expired sessions to keep the database clean';
|