bentann commited on
Commit
b2a0847
·
1 Parent(s): f17910a

Add ShareLM dataset analysis dashboard

Browse files
.gitignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dependencies
2
+ /node_modules
3
+ /.pnp
4
+ .pnp.js
5
+
6
+ # testing
7
+ /coverage
8
+
9
+ # next.js
10
+ /.next/
11
+ /out/
12
+
13
+ # production
14
+ /build
15
+
16
+ # misc
17
+ .DS_Store
18
+ *.pem
19
+
20
+ # debug
21
+ npm-debug.log*
22
+ yarn-debug.log*
23
+ yarn-error.log*
24
+
25
+ # local env files
26
+ .env*.local
27
+ .env
28
+
29
+ # vercel
30
+ .vercel
31
+
32
+ # typescript
33
+ *.tsbuildinfo
34
+ next-env.d.ts
README.md CHANGED
@@ -1,2 +1,54 @@
1
- # shareLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
 
1
+ # ShareLM Dataset Analysis Dashboard
2
+
3
+ A Next.js application for analyzing the ShareLM Hugging Face dataset (12GB) with interactive visualizations. The app uses serverless functions to process the dataset and displays:
4
+
5
+ - **Doughnut chart**: Breakdown of conversations by source
6
+ - **Time series chart**: Total count of conversations over time
7
+
8
+ ## Features
9
+
10
+ - Streams and processes large datasets efficiently using Hugging Face Datasets Server API
11
+ - In-memory caching to avoid reprocessing on every request
12
+ - Responsive dashboard with beautiful charts using Recharts
13
+ - Error handling and timeout management for large dataset processing
14
+ - Deployed on Vercel serverless functions
15
+
16
+ ## Setup
17
+
18
+ 1. Install dependencies:
19
+ ```bash
20
+ npm install
21
+ ```
22
+
23
+ 2. Run the development server:
24
+ ```bash
25
+ npm run dev
26
+ ```
27
+
28
+ 3. Open [http://localhost:3000](http://localhost:3000) in your browser
29
+
30
+ ## Deployment to Vercel
31
+
32
+ 1. Push your code to GitHub
33
+ 2. Import the project in Vercel
34
+ 3. Deploy (no environment variables needed for basic functionality)
35
+
36
+ The app will automatically:
37
+ - Cache aggregated statistics to avoid reprocessing
38
+ - Handle timeouts gracefully by processing smaller samples
39
+ - Display loading states and error messages
40
+
41
+ ## Technical Details
42
+
43
+ - **Framework**: Next.js 14 with TypeScript
44
+ - **Charts**: Recharts
45
+ - **Data Source**: Hugging Face Datasets Server API
46
+ - **Caching**: In-memory cache (1 hour TTL)
47
+ - **Timeout Management**: 25-second timeout with fallback to smaller samples
48
+
49
+ ## Notes
50
+
51
+ - Due to Vercel's serverless function timeout limits (10s free, 50s pro), the app processes a sample of the dataset (10,000-50,000 rows) rather than the full 3.5M rows
52
+ - Results are cached for 1 hour to improve performance
53
+ - The dataset is accessed via Hugging Face's Datasets Server API
54
 
app/api/source-breakdown/route.ts ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextResponse } from 'next/server';
2
+
3
+ // In-memory cache
4
+ let cachedBreakdown: {
5
+ data: Array<{ name: string; value: number }>;
6
+ lastUpdated: number;
7
+ } | null = null;
8
+
9
+ const CACHE_TTL = 60 * 60 * 1000; // 1 hour
10
+
11
+ const HF_DATASET_API = 'https://datasets-server.huggingface.co/parquet';
12
+
13
+ async function fetchDatasetSample(maxRows: number = 50000) {
14
+ const url = `${HF_DATASET_API}?dataset=shachardon%2FShareLM&config=default&split=train&offset=0&length=${maxRows}`;
15
+
16
+ try {
17
+ const controller = new AbortController();
18
+ const timeoutId = setTimeout(() => controller.abort(), 25000); // 25 second timeout
19
+
20
+ const response = await fetch(url, {
21
+ headers: {
22
+ 'Accept': 'application/json',
23
+ },
24
+ signal: controller.signal,
25
+ });
26
+
27
+ clearTimeout(timeoutId);
28
+
29
+ if (!response.ok) {
30
+ throw new Error(`HTTP error! status: ${response.status}`);
31
+ }
32
+
33
+ const data = await response.json();
34
+ return data;
35
+ } catch (error) {
36
+ if (error instanceof Error && error.name === 'AbortError') {
37
+ throw new Error('Request timeout - dataset is too large to process in time limit');
38
+ }
39
+ console.error('Error fetching from HF API:', error);
40
+ throw error;
41
+ }
42
+ }
43
+
44
+ export async function GET() {
45
+ try {
46
+ // Check cache first
47
+ if (cachedBreakdown && Date.now() - cachedBreakdown.lastUpdated < CACHE_TTL) {
48
+ return NextResponse.json({
49
+ data: cachedBreakdown.data,
50
+ cached: true,
51
+ });
52
+ }
53
+
54
+ // Fetch dataset sample - start with smaller sample
55
+ let datasetData;
56
+ let maxRows = 10000;
57
+
58
+ try {
59
+ datasetData = await fetchDatasetSample(maxRows);
60
+ } catch (error) {
61
+ // If timeout, try with even smaller sample
62
+ if (error instanceof Error && error.message.includes('timeout')) {
63
+ maxRows = 5000;
64
+ datasetData = await fetchDatasetSample(maxRows);
65
+ } else {
66
+ throw error;
67
+ }
68
+ }
69
+
70
+ const sourceCounts: Record<string, number> = {};
71
+
72
+ // Handle different response formats
73
+ let rows: any[] = [];
74
+ if (datasetData.rows) {
75
+ rows = datasetData.rows;
76
+ } else if (Array.isArray(datasetData)) {
77
+ rows = datasetData;
78
+ } else if (datasetData.data) {
79
+ rows = datasetData.data;
80
+ }
81
+
82
+ let processedCount = 0;
83
+
84
+ for (const row of rows) {
85
+ processedCount++;
86
+
87
+ // Get row data - handle different formats
88
+ let rowData: any = {};
89
+ if (row.row) {
90
+ rowData = row.row;
91
+ } else if (typeof row === 'object') {
92
+ rowData = row;
93
+ }
94
+
95
+ const source = rowData.source || 'unknown';
96
+ sourceCounts[source] = (sourceCounts[source] || 0) + 1;
97
+ }
98
+
99
+ // Convert to array format for chart
100
+ const data = Object.entries(sourceCounts)
101
+ .map(([name, value]) => ({ name, value }))
102
+ .sort((a, b) => b.value - a.value); // Sort by value descending
103
+
104
+ // Update cache
105
+ cachedBreakdown = {
106
+ data,
107
+ lastUpdated: Date.now(),
108
+ };
109
+
110
+ return NextResponse.json({
111
+ data,
112
+ processedCount,
113
+ cached: false,
114
+ });
115
+ } catch (error) {
116
+ console.error('Error processing source breakdown:', error);
117
+ return NextResponse.json(
118
+ { error: 'Failed to process dataset', details: error instanceof Error ? error.message : 'Unknown error' },
119
+ { status: 500 }
120
+ );
121
+ }
122
+ }
app/api/stats/route.ts ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextResponse } from 'next/server';
2
+
3
+ // In-memory cache (will be reset on serverless function restart)
4
+ let cachedStats: {
5
+ sourceBreakdown: Record<string, number>;
6
+ timeSeries: Record<string, number>;
7
+ lastUpdated: number;
8
+ } | null = null;
9
+
10
+ const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds
11
+
12
+ // Hugging Face API endpoint for dataset
13
+ const HF_DATASET_API = 'https://datasets-server.huggingface.co/parquet';
14
+
15
+ async function fetchDatasetSample(maxRows: number = 50000) {
16
+ // Use Hugging Face Datasets Server API to get a sample
17
+ // For large datasets, we'll process a sample to avoid timeout
18
+ const url = `${HF_DATASET_API}?dataset=shachardon%2FShareLM&config=default&split=train&offset=0&length=${maxRows}`;
19
+
20
+ try {
21
+ const controller = new AbortController();
22
+ const timeoutId = setTimeout(() => controller.abort(), 25000); // 25 second timeout
23
+
24
+ const response = await fetch(url, {
25
+ headers: {
26
+ 'Accept': 'application/json',
27
+ },
28
+ signal: controller.signal,
29
+ });
30
+
31
+ clearTimeout(timeoutId);
32
+
33
+ if (!response.ok) {
34
+ throw new Error(`HTTP error! status: ${response.status}`);
35
+ }
36
+
37
+ const data = await response.json();
38
+ return data;
39
+ } catch (error) {
40
+ if (error instanceof Error && error.name === 'AbortError') {
41
+ throw new Error('Request timeout - dataset is too large to process in time limit');
42
+ }
43
+ console.error('Error fetching from HF API:', error);
44
+ throw error;
45
+ }
46
+ }
47
+
48
+ export async function GET() {
49
+ try {
50
+ // Check cache first
51
+ if (cachedStats && Date.now() - cachedStats.lastUpdated < CACHE_TTL) {
52
+ return NextResponse.json({
53
+ sourceBreakdown: cachedStats.sourceBreakdown,
54
+ timeSeries: cachedStats.timeSeries,
55
+ cached: true,
56
+ });
57
+ }
58
+
59
+ // Fetch dataset sample from Hugging Face API
60
+ // Start with smaller sample to avoid timeout
61
+ let datasetData;
62
+ let maxRows = 10000;
63
+
64
+ try {
65
+ datasetData = await fetchDatasetSample(maxRows);
66
+ } catch (error) {
67
+ // If timeout, try with even smaller sample
68
+ if (error instanceof Error && error.message.includes('timeout')) {
69
+ maxRows = 5000;
70
+ datasetData = await fetchDatasetSample(maxRows);
71
+ } else {
72
+ throw error;
73
+ }
74
+ }
75
+
76
+ const sourceBreakdown: Record<string, number> = {};
77
+ const timeSeries: Record<string, number> = {};
78
+
79
+ // Process the data - handle different response formats
80
+ let rows: any[] = [];
81
+ if (datasetData.rows) {
82
+ rows = datasetData.rows;
83
+ } else if (Array.isArray(datasetData)) {
84
+ rows = datasetData;
85
+ } else if (datasetData.data) {
86
+ rows = datasetData.data;
87
+ }
88
+
89
+ let processedCount = 0;
90
+
91
+ for (const row of rows) {
92
+ processedCount++;
93
+
94
+ // Get row data - handle different formats
95
+ let rowData: any = {};
96
+ if (row.row) {
97
+ rowData = row.row;
98
+ } else if (typeof row === 'object') {
99
+ rowData = row;
100
+ }
101
+
102
+ // Aggregate by source
103
+ const source = rowData.source || 'unknown';
104
+ sourceBreakdown[source] = (sourceBreakdown[source] || 0) + 1;
105
+
106
+ // Aggregate by timestamp (group by date)
107
+ if (rowData.timestamp) {
108
+ try {
109
+ const date = new Date(rowData.timestamp);
110
+ if (!isNaN(date.getTime())) {
111
+ const dateKey = date.toISOString().split('T')[0]; // YYYY-MM-DD
112
+ timeSeries[dateKey] = (timeSeries[dateKey] || 0) + 1;
113
+ }
114
+ } catch (e) {
115
+ // Skip invalid timestamps
116
+ }
117
+ }
118
+ }
119
+
120
+ // Sort time series by date
121
+ const sortedTimeSeries: Record<string, number> = {};
122
+ Object.keys(timeSeries)
123
+ .sort()
124
+ .forEach((key) => {
125
+ sortedTimeSeries[key] = timeSeries[key];
126
+ });
127
+
128
+ // Update cache
129
+ cachedStats = {
130
+ sourceBreakdown,
131
+ timeSeries: sortedTimeSeries,
132
+ lastUpdated: Date.now(),
133
+ };
134
+
135
+ return NextResponse.json({
136
+ sourceBreakdown,
137
+ timeSeries: sortedTimeSeries,
138
+ processedCount,
139
+ cached: false,
140
+ sampleSize: maxRows,
141
+ note: processedCount < maxRows ? 'Full sample processed' : `Processed ${processedCount} rows (limited to avoid timeout)`,
142
+ });
143
+ } catch (error) {
144
+ console.error('Error processing dataset:', error);
145
+ return NextResponse.json(
146
+ { error: 'Failed to process dataset', details: error instanceof Error ? error.message : 'Unknown error' },
147
+ { status: 500 }
148
+ );
149
+ }
150
+ }
app/globals.css ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * {
2
+ box-sizing: border-box;
3
+ padding: 0;
4
+ margin: 0;
5
+ }
6
+
7
+ html,
8
+ body {
9
+ max-width: 100vw;
10
+ overflow-x: hidden;
11
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
12
+ 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
13
+ sans-serif;
14
+ -webkit-font-smoothing: antialiased;
15
+ -moz-osx-font-smoothing: grayscale;
16
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
17
+ min-height: 100vh;
18
+ }
19
+
20
+ body {
21
+ color: #333;
22
+ }
23
+
24
+ a {
25
+ color: inherit;
26
+ text-decoration: none;
27
+ }
app/layout.tsx ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Metadata } from 'next'
2
+ import './globals.css'
3
+
4
+ export const metadata: Metadata = {
5
+ title: 'ShareLM Dataset Analysis',
6
+ description: 'Analysis dashboard for the ShareLM Hugging Face dataset',
7
+ }
8
+
9
+ export default function RootLayout({
10
+ children,
11
+ }: {
12
+ children: React.ReactNode
13
+ }) {
14
+ return (
15
+ <html lang="en">
16
+ <body>{children}</body>
17
+ </html>
18
+ )
19
+ }
app/page.module.css ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .mainContainer {
2
+ padding: 2rem;
3
+ max-width: 1400px;
4
+ margin: 0 auto;
5
+ }
6
+
7
+ .header {
8
+ text-align: center;
9
+ margin-bottom: 3rem;
10
+ color: white;
11
+ }
12
+
13
+ .header h1 {
14
+ font-size: 3rem;
15
+ font-weight: 700;
16
+ margin-bottom: 0.5rem;
17
+ text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);
18
+ }
19
+
20
+ .subtitle {
21
+ font-size: 1.2rem;
22
+ opacity: 0.9;
23
+ }
24
+
25
+
26
+ .errorBanner {
27
+ background: #fee;
28
+ border: 1px solid #fcc;
29
+ border-radius: 8px;
30
+ padding: 1rem;
31
+ margin-bottom: 2rem;
32
+ display: flex;
33
+ justify-content: space-between;
34
+ align-items: center;
35
+ }
36
+
37
+ .errorBanner button {
38
+ background: #667eea;
39
+ color: white;
40
+ border: none;
41
+ padding: 0.5rem 1rem;
42
+ border-radius: 4px;
43
+ cursor: pointer;
44
+ font-size: 0.9rem;
45
+ }
46
+
47
+ .errorBanner button:hover {
48
+ background: #5568d3;
49
+ }
50
+
51
+ .statsInfo {
52
+ background: white;
53
+ border-radius: 12px;
54
+ padding: 1.5rem;
55
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
56
+ display: flex;
57
+ justify-content: space-around;
58
+ flex-wrap: wrap;
59
+ gap: 1rem;
60
+ text-align: center;
61
+ }
62
+
63
+ .statsInfo p {
64
+ font-size: 1rem;
65
+ color: #333;
66
+ font-weight: 500;
67
+ }
68
+
app/page.tsx ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'use client';
2
+
3
+ import { useEffect, useState } from 'react';
4
+ import { Charts } from '@/components/Charts';
5
+ import styles from './page.module.css';
6
+
7
+ interface SourceData {
8
+ name: string;
9
+ value: number;
10
+ }
11
+
12
+ interface TimeSeriesData {
13
+ date: string;
14
+ count: number;
15
+ }
16
+
17
+ export default function Home() {
18
+ const [sourceData, setSourceData] = useState<SourceData[]>([]);
19
+ const [timeSeriesData, setTimeSeriesData] = useState<TimeSeriesData[]>([]);
20
+ const [loading, setLoading] = useState(true);
21
+ const [error, setError] = useState<string | null>(null);
22
+
23
+ useEffect(() => {
24
+ async function fetchData() {
25
+ try {
26
+ setLoading(true);
27
+ setError(null);
28
+
29
+ // Fetch stats from API
30
+ const response = await fetch('/api/stats');
31
+
32
+ if (!response.ok) {
33
+ throw new Error(`Failed to fetch data: ${response.statusText}`);
34
+ }
35
+
36
+ const data = await response.json();
37
+
38
+ if (data.error) {
39
+ throw new Error(data.error);
40
+ }
41
+
42
+ // Format source breakdown data
43
+ const sourceBreakdown = Object.entries(data.sourceBreakdown || {}).map(([name, value]) => ({
44
+ name,
45
+ value: value as number,
46
+ })).sort((a, b) => b.value - a.value);
47
+
48
+ // Format time series data
49
+ const timeSeries = Object.entries(data.timeSeries || {}).map(([date, count]) => ({
50
+ date,
51
+ count: count as number,
52
+ })).sort((a, b) => a.date.localeCompare(b.date));
53
+
54
+ setSourceData(sourceBreakdown);
55
+ setTimeSeriesData(timeSeries);
56
+ } catch (err) {
57
+ console.error('Error fetching data:', err);
58
+ setError(err instanceof Error ? err.message : 'An unknown error occurred');
59
+ } finally {
60
+ setLoading(false);
61
+ }
62
+ }
63
+
64
+ fetchData();
65
+ }, []);
66
+
67
+ return (
68
+ <main className={styles.mainContainer}>
69
+ <div className={styles.header}>
70
+ <h1>ShareLM Dataset Analysis</h1>
71
+ <p className={styles.subtitle}>Analyzing conversations from the ShareLM Hugging Face dataset</p>
72
+ </div>
73
+
74
+ {error && (
75
+ <div className={styles.errorBanner}>
76
+ <p>Error: {error}</p>
77
+ <button onClick={() => window.location.reload()}>Retry</button>
78
+ </div>
79
+ )}
80
+
81
+ <Charts
82
+ sourceData={sourceData}
83
+ timeSeriesData={timeSeriesData}
84
+ loading={loading}
85
+ />
86
+
87
+ {!loading && !error && (
88
+ <div className={styles.statsInfo}>
89
+ <p>Total sources: {sourceData.length}</p>
90
+ <p>Total time points: {timeSeriesData.length}</p>
91
+ <p>Total conversations analyzed: {sourceData.reduce((sum, item) => sum + item.value, 0).toLocaleString()}</p>
92
+ </div>
93
+ )}
94
+ </main>
95
+ );
96
+ }
components/Charts.module.css ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .chartContainer {
2
+ background: white;
3
+ border-radius: 12px;
4
+ padding: 2rem;
5
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
6
+ }
7
+
8
+ .chartContainer h2 {
9
+ margin-bottom: 1.5rem;
10
+ color: #333;
11
+ font-size: 1.5rem;
12
+ text-align: center;
13
+ }
14
+
15
+ .loadingState,
16
+ .errorState {
17
+ text-align: center;
18
+ padding: 3rem;
19
+ color: #666;
20
+ font-size: 1.1rem;
21
+ }
22
+
23
+ .chartsWrapper {
24
+ display: grid;
25
+ grid-template-columns: 1fr;
26
+ gap: 2rem;
27
+ margin-bottom: 2rem;
28
+ }
29
+
30
+ @media (min-width: 768px) {
31
+ .chartsWrapper {
32
+ grid-template-columns: 1fr 1fr;
33
+ }
34
+ }
35
+
36
+ @media (min-width: 1200px) {
37
+ .chartsWrapper {
38
+ grid-template-columns: 1fr 1fr;
39
+ }
40
+ }
components/Charts.tsx ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'use client';
2
+
3
+ import { PieChart, Pie, Cell, ResponsiveContainer, Tooltip, Legend } from 'recharts';
4
+ import { LineChart, Line, XAxis, YAxis, CartesianGrid, ResponsiveContainer as LineResponsiveContainer } from 'recharts';
5
+ import styles from './Charts.module.css';
6
+
7
+ interface SourceData {
8
+ name: string;
9
+ value: number;
10
+ }
11
+
12
+ interface ChartsProps {
13
+ sourceData: SourceData[];
14
+ timeSeriesData: Array<{ date: string; count: number }>;
15
+ loading: boolean;
16
+ }
17
+
18
+ const COLORS = ['#0088FE', '#00C49F', '#FFBB28', '#FF8042', '#8884d8', '#82ca9d', '#ffc658', '#ff7300'];
19
+
20
+ export function SourceDoughnutChart({ data, loading }: { data: SourceData[]; loading: boolean }) {
21
+ if (loading) {
22
+ return (
23
+ <div className={styles.chartContainer}>
24
+ <div className={styles.loadingState}>Loading source breakdown...</div>
25
+ </div>
26
+ );
27
+ }
28
+
29
+ if (!data || data.length === 0) {
30
+ return (
31
+ <div className={styles.chartContainer}>
32
+ <div className={styles.errorState}>No data available</div>
33
+ </div>
34
+ );
35
+ }
36
+
37
+ return (
38
+ <div className={styles.chartContainer}>
39
+ <h2>Source Breakdown</h2>
40
+ <ResponsiveContainer width="100%" height={400}>
41
+ <PieChart>
42
+ <Pie
43
+ data={data}
44
+ cx="50%"
45
+ cy="50%"
46
+ labelLine={false}
47
+ label={({ name, percent }) => `${name}: ${(percent * 100).toFixed(1)}%`}
48
+ outerRadius={120}
49
+ innerRadius={60}
50
+ fill="#8884d8"
51
+ dataKey="value"
52
+ >
53
+ {data.map((entry, index) => (
54
+ <Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
55
+ ))}
56
+ </Pie>
57
+ <Tooltip />
58
+ <Legend />
59
+ </PieChart>
60
+ </ResponsiveContainer>
61
+ </div>
62
+ );
63
+ }
64
+
65
+ export function TimeSeriesChart({ data, loading }: { data: Array<{ date: string; count: number }>; loading: boolean }) {
66
+ if (loading) {
67
+ return (
68
+ <div className={styles.chartContainer}>
69
+ <div className={styles.loadingState}>Loading time series data...</div>
70
+ </div>
71
+ );
72
+ }
73
+
74
+ if (!data || data.length === 0) {
75
+ return (
76
+ <div className={styles.chartContainer}>
77
+ <div className={styles.errorState}>No data available</div>
78
+ </div>
79
+ );
80
+ }
81
+
82
+ // Format data for chart (sample if too many points)
83
+ const chartData = data.length > 100
84
+ ? data.filter((_, i) => i % Math.ceil(data.length / 100) === 0)
85
+ : data;
86
+
87
+ return (
88
+ <div className={styles.chartContainer}>
89
+ <h2>Total Count Over Time</h2>
90
+ <LineResponsiveContainer width="100%" height={400}>
91
+ <LineChart data={chartData}>
92
+ <CartesianGrid strokeDasharray="3 3" />
93
+ <XAxis
94
+ dataKey="date"
95
+ angle={-45}
96
+ textAnchor="end"
97
+ height={100}
98
+ interval="preserveStartEnd"
99
+ />
100
+ <YAxis />
101
+ <Tooltip />
102
+ <Line type="monotone" dataKey="count" stroke="#8884d8" strokeWidth={2} dot={{ r: 3 }} />
103
+ </LineChart>
104
+ </LineResponsiveContainer>
105
+ </div>
106
+ );
107
+ }
108
+
109
+ export function Charts({ sourceData, timeSeriesData, loading }: ChartsProps) {
110
+ return (
111
+ <div className={styles.chartsWrapper}>
112
+ <SourceDoughnutChart data={sourceData} loading={loading} />
113
+ <TimeSeriesChart data={timeSeriesData} loading={loading} />
114
+ </div>
115
+ );
116
+ }
next.config.js ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ /** @type {import('next').NextConfig} */
2
+ const nextConfig = {
3
+ experimental: {
4
+ serverActions: {
5
+ bodySizeLimit: '10mb',
6
+ },
7
+ },
8
+ }
9
+
10
+ module.exports = nextConfig
package.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "sharelm-analysis",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "next dev",
7
+ "build": "next build",
8
+ "start": "next start",
9
+ "lint": "next lint"
10
+ },
11
+ "dependencies": {
12
+ "next": "^14.2.0",
13
+ "react": "^18.3.0",
14
+ "react-dom": "^18.3.0",
15
+ "recharts": "^2.12.0"
16
+ },
17
+ "devDependencies": {
18
+ "@types/node": "^20.11.0",
19
+ "@types/react": "^18.2.0",
20
+ "@types/react-dom": "^18.2.0",
21
+ "typescript": "^5.3.0",
22
+ "eslint": "^8.56.0",
23
+ "eslint-config-next": "^14.2.0"
24
+ }
25
+ }
tsconfig.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "lib": ["dom", "dom.iterable", "esnext"],
5
+ "allowJs": true,
6
+ "skipLibCheck": true,
7
+ "strict": true,
8
+ "noEmit": true,
9
+ "esModuleInterop": true,
10
+ "module": "esnext",
11
+ "moduleResolution": "bundler",
12
+ "resolveJsonModule": true,
13
+ "isolatedModules": true,
14
+ "jsx": "preserve",
15
+ "incremental": true,
16
+ "plugins": [
17
+ {
18
+ "name": "next"
19
+ }
20
+ ],
21
+ "paths": {
22
+ "@/*": ["./*"]
23
+ }
24
+ },
25
+ "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26
+ "exclude": ["node_modules"]
27
+ }
vercel.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "functions": {
3
+ "app/api/**/*.ts": {
4
+ "maxDuration": 30
5
+ }
6
+ }
7
+ }