datamatters24 commited on
Commit
1e94321
·
verified ·
1 Parent(s): 0919819

Upload web/src/controllers/BrowseController.php with huggingface_hub

Browse files
web/src/controllers/BrowseController.php ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ declare(strict_types=1);
4
+
5
+ class BrowseController
6
+ {
7
+ /**
8
+ * List all collections with their document counts.
9
+ */
10
+ public function index(): void
11
+ {
12
+ // /browse with no collection — redirect to home which shows all collections
13
+ header('Location: /');
14
+ http_response_code(302);
15
+ }
16
+
17
+ /**
18
+ * Display a paginated list of documents within a specific collection.
19
+ */
20
+ public function collection(string $collection): void
21
+ {
22
+ $collection = Security::sanitize($collection);
23
+
24
+ // Validate the collection name against known sections
25
+ $validCollections = [
26
+ 'jfk_assassination',
27
+ 'cia_declassified',
28
+ 'cia_stargate',
29
+ 'cia_mkultra',
30
+ 'lincoln_archives',
31
+ 'house_resolutions',
32
+ 'doj_disclosures',
33
+ 'nasa_apod',
34
+ 'nasa_epic',
35
+ 'area51_cia',
36
+ 'court_records',
37
+ 'foia',
38
+ 'house_oversight',
39
+ ];
40
+
41
+ if (!in_array($collection, $validCollections, true)) {
42
+ http_response_code(404);
43
+ echo '<!DOCTYPE html><html><head><title>Not Found</title></head>';
44
+ echo '<body><h1>404 &mdash; Collection Not Found</h1></body></html>';
45
+ return;
46
+ }
47
+
48
+ $config = require __DIR__ . '/../config.php';
49
+ $perPage = $config['app']['per_page'];
50
+
51
+ // Pagination
52
+ $page = max(1, (int) ($_GET['page'] ?? 1));
53
+ $offset = ($page - 1) * $perPage;
54
+
55
+ // Sort
56
+ $sort = $_GET['sort'] ?? 'processed_at';
57
+ $sort = Security::sanitize($sort);
58
+
59
+ $collectionNames = [
60
+ 'jfk_assassination' => 'JFK Assassination Records',
61
+ 'cia_declassified' => 'CIA Declassified',
62
+ 'cia_stargate' => 'CIA Stargate Program',
63
+ 'cia_mkultra' => 'CIA MKUltra',
64
+ 'lincoln_archives' => 'Lincoln Archives',
65
+ 'house_resolutions' => 'House Resolutions',
66
+ 'nasa_apod' => 'NASA APOD',
67
+ 'nasa_epic' => 'NASA EPIC',
68
+ 'area51_cia' => 'Area 51 / CIA Declassified',
69
+ 'court_records' => 'Court Records',
70
+ 'foia' => 'FOIA Releases',
71
+ 'house_oversight' => 'House Oversight',
72
+ ];
73
+ $collectionName = $collectionNames[$collection] ?? ucwords(str_replace('_', ' ', $collection));
74
+
75
+ $docModel = new Document();
76
+ $documents = $docModel->getByCollection($collection, $perPage, $offset, $sort);
77
+ $total = $docModel->countByCollection($collection);
78
+ $totalPages = (int) ceil($total / $perPage);
79
+
80
+ // Attach top topic to each document
81
+ if (!empty($documents)) {
82
+ $db = Database::getInstance();
83
+ $docIds = array_column($documents, 'id');
84
+ $placeholders = implode(',', array_fill(0, count($docIds), '?'));
85
+ try {
86
+ $topicRows = $db->fetchAll(
87
+ "SELECT document_id, feature_json FROM document_features
88
+ WHERE document_id IN ($placeholders) AND feature_name = 'topic_distribution'",
89
+ $docIds
90
+ );
91
+ $topicMap = [];
92
+ foreach ($topicRows as $row) {
93
+ $data = json_decode($row['feature_json'], true) ?: [];
94
+ arsort($data);
95
+ $top = array_slice($data, 0, 2, true);
96
+ $topicMap[$row['document_id']] = $top;
97
+ }
98
+ foreach ($documents as &$doc) {
99
+ $doc['topics'] = $topicMap[$doc['id']] ?? [];
100
+ }
101
+ unset($doc);
102
+ } catch (\PDOException $e) {
103
+ // Topics not available yet
104
+ }
105
+ }
106
+
107
+ require __DIR__ . '/../views/browse.php';
108
+ }
109
+ }