File size: 1,183 Bytes
db764ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import { useState } from "react";
import { api, getErrorMessage } from "../api";

/**
 * Shared hook for loading corpus text from the engine and parsing it into documents.
 * Used by both TrainingPanel and Word2VecPanel.
 */
export function useCorpusLoader() {
  const [corpusText, setCorpusText] = useState("");
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState("");

  function parseCorpus(): string[] {
    return corpusText
      .split(/\n{2,}/)
      .map((t) => t.trim())
      .filter((t) => t.length > 20);
  }

  async function loadFromEngine() {
    setLoading(true);
    setError("");
    try {
      const res = await api.getCorpusTexts();
      if (res.documents.length === 0) {
        setError("No documents loaded in the engine. Load a dataset first.");
        return;
      }
      setCorpusText(
        res.documents.map((d: { doc_id: string; text: string }) => d.text).join("\n\n")
      );
    } catch (e) {
      setError(getErrorMessage(e));
    } finally {
      setLoading(false);
    }
  }

  return {
    corpusText,
    setCorpusText,
    loading,
    error,
    setError,
    parseCorpus,
    loadFromEngine,
  };
}