File size: 7,536 Bytes
db764ae
 
 
 
 
 
 
 
 
 
 
 
 
f930251
 
 
 
 
 
 
 
 
db764ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e29b232
 
 
db764ae
 
 
9f87ec0
 
 
db764ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f87ec0
 
 
 
 
 
 
 
 
 
 
 
db764ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import axios from "axios";
import type {
  InitRequest, InitResponse, DocumentRequest, AddDocResponse, BuildIndexResponse,
  QueryRequest, QueryResponse, CompareRequest, CompareResponse,
  KeywordAnalysisRequest, KeywordAnalysisResponse,
  KeywordMatchRequest, MatchResponse, BatchAnalysisRequest,
  CorpusStats, SimilarityDistribution, DisambiguationMetric, RetrievalMetric,
  TrainResponse, TrainEvalResponse,
  W2VInitResponse, W2VQueryResult, W2VSimilarWord,
  DatasetInfo, DatasetLoadRequest, DatasetLoadResponse, DatasetPreviewResponse,
  ContextAnalysisResponse,
} from "./types";

// HuggingFace Spaces proxy requires the __sign token on every request.
// Extract it from the page URL and attach to all API calls.
const _params = new URLSearchParams(window.location.search);
const _sign = _params.get("__sign");

const client = axios.create({
  baseURL: "/api",
  ...(_sign && { params: { __sign: _sign } }),
});
const long = { timeout: 600000 };

/** Extract a human-readable error message from an Axios error. */
export function getErrorMessage(err: unknown): string {
  if (axios.isAxiosError(err)) {
    if (err.code === "ECONNABORTED") return "Request timed out. The server may be busy.";
    if (!err.response) return "Cannot connect to server. Is it running? (uv run python server.py)";
    const detail = err.response.data?.detail;
    if (typeof detail === "string") return detail;
    if (typeof err.response.data === "string") return err.response.data;
    return `Server error (${err.response.status})`;
  }
  if (err instanceof Error) return err.message;
  return "An unexpected error occurred.";
}

/** Check if the backend is reachable. Returns null on success or an error message. */
export async function checkConnection(): Promise<string | null> {
  try {
    await client.get("/stats", { timeout: 5000 });
    return null;
  } catch (err) {
    if (axios.isAxiosError(err) && err.response?.status === 400) {
      // 400 = "Engine not initialized" — server is up, just no engine yet
      return null;
    }
    return getErrorMessage(err);
  }
}

/** Shared shape for all training requests (matches server TrainRequest). */
interface TrainRequestData {
  corpus_texts: string[];
  base_model: string;
  output_path: string;
  epochs: number;
  batch_size: number;
}

export const api = {
  // ---- Training ----
  trainUnsupervised: (data: TrainRequestData) =>
    client.post<TrainResponse>("/train/unsupervised", data, long).then(r => r.data),

  trainContrastive: (data: TrainRequestData) =>
    client.post<TrainResponse>("/train/contrastive", data, long).then(r => r.data),

  trainKeywords: (data: TrainRequestData & { keyword_meanings: Record<string, string> }) =>
    client.post<TrainResponse>("/train/keywords", data, long).then(r => r.data),

  trainEvaluate: (data: { test_pairs: { text_a: string; text_b: string; expected: number }[]; trained_model_path: string; base_model: string; corpus_texts: string[] }) =>
    client.post<TrainEvalResponse>("/train/evaluate", data).then(r => r.data),

  // ---- Engine ----
  init: (data: InitRequest) =>
    client.post<InitResponse>("/init", data).then(r => r.data),

  addDocument: (data: DocumentRequest) =>
    client.post<AddDocResponse>("/documents", data).then(r => r.data),

  buildIndex: () =>
    client.post<BuildIndexResponse>("/index/build").then(r => r.data),

  query: (data: QueryRequest) =>
    client.post<QueryResponse>("/query", data).then(r => r.data),

  compare: (data: CompareRequest) =>
    client.post<CompareResponse>("/compare", data).then(r => r.data),

  analyzeKeyword: (data: KeywordAnalysisRequest) =>
    client.post<KeywordAnalysisResponse>("/analyze/keyword", data).then(r => r.data),

  batchAnalyze: (data: BatchAnalysisRequest) =>
    client.post<Record<string, KeywordAnalysisResponse>>("/analyze/batch", data).then(r => r.data),

  matchKeyword: (data: KeywordMatchRequest) =>
    client.post<MatchResponse>("/match", data).then(r => r.data),

  analyzeContext: (data: { keyword: string; cluster_threshold?: number; top_words?: number }) =>
    client.post<ContextAnalysisResponse>("/analyze/context", data).then(r => r.data),

  similarWords: (data: { word: string; top_k: number }) =>
    client.post<{ word: string; similar: { word: string; score: number }[] }>("/analyze/similar-words", data).then(r => r.data),

  getStats: () =>
    client.get<CorpusStats>("/stats").then(r => r.data),

  pollLogs: (cursor: number = 0) =>
    client.get<{ lines: string[]; cursor: number }>(`/logs/poll?cursor=${cursor}`).then(r => r.data),

  getCorpusTexts: (maxDocs: number = 500) =>
    client.get<{ documents: { doc_id: string; text: string }[]; count: number }>(`/corpus/texts?max_docs=${maxDocs}`).then(r => r.data),

  getDocument: (docId: string) =>
    client.get<{ doc_id: string; text: string; num_chunks: number }>(`/documents/${encodeURIComponent(docId)}`).then(r => r.data),

  // ---- Engine persistence ----
  saveEngine: () =>
    client.post<{ status: string; chunks: number; documents: number }>("/engine/save").then(r => r.data),

  hasSavedState: () =>
    client.get<{ exists: boolean }>("/engine/has-saved-state").then(r => r.data),

  // ---- Evaluation ----
  getSimilarityDistribution: () =>
    client.get<SimilarityDistribution>("/eval/similarity-distribution").then(r => r.data),

  evalDisambiguation: (data: { ground_truth: { keyword: string; text: string; true_meaning: string }[]; candidate_meanings: Record<string, string[]> }) =>
    client.post<{ metrics: DisambiguationMetric[] }>("/eval/disambiguation", data).then(r => r.data),

  evalRetrieval: (data: { queries: { query: string; relevant_doc_ids?: string[]; relevant_texts?: string[] }[]; k_values: number[] }) =>
    client.post<{ metrics: RetrievalMetric[] }>("/eval/retrieval", data).then(r => r.data),

  // ---- Word2Vec ----
  w2vInit: (data: { corpus_texts: string[]; vector_size: number; window: number; epochs: number }) =>
    client.post<W2VInitResponse>("/w2v/init", data, long).then(r => r.data),

  w2vInitFromEngine: (params?: { vector_size?: number; window?: number; epochs?: number }) =>
    client.post<W2VInitResponse & { documents_used: number }>(`/w2v/init-from-engine`, null, {
      ...long,
      params: { ...(_sign && { __sign: _sign }), ...params },
    }).then(r => r.data),

  w2vStatus: () =>
    client.get<{ ready: boolean; vocab_size?: number; sentences?: number; vector_size?: number; has_saved_state?: boolean }>("/w2v/status").then(r => r.data),

  w2vReset: () =>
    client.post<{ status: string; message: string }>("/w2v/reset").then(r => r.data),

  w2vCompare: (data: { text_a: string; text_b: string }) =>
    client.post<CompareResponse>("/w2v/compare", data).then(r => r.data),

  w2vQuery: (data: { text: string; top_k: number }) =>
    client.post<{ query: string; results: W2VQueryResult[] }>("/w2v/query", data).then(r => r.data),

  w2vSimilarWords: (data: { word: string; top_k: number }) =>
    client.post<{ word: string; similar: W2VSimilarWord[] }>("/w2v/similar-words", data).then(r => r.data),

  // ---- Dataset (HuggingFace) ----
  datasetInfo: () =>
    client.get<DatasetInfo>("/dataset/info").then(r => r.data),

  datasetLoad: (data: DatasetLoadRequest) =>
    client.post<DatasetLoadResponse>("/dataset/load", data, long).then(r => r.data),

  datasetPreview: (maxDocs: number = 10, sourceFilter?: string) =>
    client.post<DatasetPreviewResponse>(`/dataset/preview?max_docs=${maxDocs}${sourceFilter ? `&source_filter=${sourceFilter}` : ""}`).then(r => r.data),
};