import axios from "axios"; import type { InitRequest, InitResponse, DocumentRequest, AddDocResponse, BuildIndexResponse, QueryRequest, QueryResponse, CompareRequest, CompareResponse, KeywordAnalysisRequest, KeywordAnalysisResponse, KeywordMatchRequest, MatchResponse, BatchAnalysisRequest, CorpusStats, SimilarityDistribution, DisambiguationMetric, RetrievalMetric, TrainResponse, TrainEvalResponse, W2VInitResponse, W2VQueryResult, W2VSimilarWord, DatasetInfo, DatasetLoadRequest, DatasetLoadResponse, DatasetPreviewResponse, ContextAnalysisResponse, } from "./types"; // HuggingFace Spaces proxy requires the __sign token on every request. // Extract it from the page URL and attach to all API calls. const _params = new URLSearchParams(window.location.search); const _sign = _params.get("__sign"); const client = axios.create({ baseURL: "/api", ...(_sign && { params: { __sign: _sign } }), }); const long = { timeout: 600000 }; /** Extract a human-readable error message from an Axios error. */ export function getErrorMessage(err: unknown): string { if (axios.isAxiosError(err)) { if (err.code === "ECONNABORTED") return "Request timed out. The server may be busy."; if (!err.response) return "Cannot connect to server. Is it running? (uv run python server.py)"; const detail = err.response.data?.detail; if (typeof detail === "string") return detail; if (typeof err.response.data === "string") return err.response.data; return `Server error (${err.response.status})`; } if (err instanceof Error) return err.message; return "An unexpected error occurred."; } /** Check if the backend is reachable. Returns null on success or an error message. */ export async function checkConnection(): Promise { try { await client.get("/stats", { timeout: 5000 }); return null; } catch (err) { if (axios.isAxiosError(err) && err.response?.status === 400) { // 400 = "Engine not initialized" — server is up, just no engine yet return null; } return getErrorMessage(err); } } /** Shared shape for all training requests (matches server TrainRequest). */ interface TrainRequestData { corpus_texts: string[]; base_model: string; output_path: string; epochs: number; batch_size: number; } export const api = { // ---- Training ---- trainUnsupervised: (data: TrainRequestData) => client.post("/train/unsupervised", data, long).then(r => r.data), trainContrastive: (data: TrainRequestData) => client.post("/train/contrastive", data, long).then(r => r.data), trainKeywords: (data: TrainRequestData & { keyword_meanings: Record }) => client.post("/train/keywords", data, long).then(r => r.data), trainEvaluate: (data: { test_pairs: { text_a: string; text_b: string; expected: number }[]; trained_model_path: string; base_model: string; corpus_texts: string[] }) => client.post("/train/evaluate", data).then(r => r.data), // ---- Engine ---- init: (data: InitRequest) => client.post("/init", data).then(r => r.data), addDocument: (data: DocumentRequest) => client.post("/documents", data).then(r => r.data), buildIndex: () => client.post("/index/build").then(r => r.data), query: (data: QueryRequest) => client.post("/query", data).then(r => r.data), compare: (data: CompareRequest) => client.post("/compare", data).then(r => r.data), analyzeKeyword: (data: KeywordAnalysisRequest) => client.post("/analyze/keyword", data).then(r => r.data), batchAnalyze: (data: BatchAnalysisRequest) => client.post>("/analyze/batch", data).then(r => r.data), matchKeyword: (data: KeywordMatchRequest) => client.post("/match", data).then(r => r.data), analyzeContext: (data: { keyword: string; cluster_threshold?: number; top_words?: number }) => client.post("/analyze/context", data).then(r => r.data), similarWords: (data: { word: string; top_k: number }) => client.post<{ word: string; similar: { word: string; score: number }[] }>("/analyze/similar-words", data).then(r => r.data), getStats: () => client.get("/stats").then(r => r.data), pollLogs: (cursor: number = 0) => client.get<{ lines: string[]; cursor: number }>(`/logs/poll?cursor=${cursor}`).then(r => r.data), getCorpusTexts: (maxDocs: number = 500) => client.get<{ documents: { doc_id: string; text: string }[]; count: number }>(`/corpus/texts?max_docs=${maxDocs}`).then(r => r.data), getDocument: (docId: string) => client.get<{ doc_id: string; text: string; num_chunks: number }>(`/documents/${encodeURIComponent(docId)}`).then(r => r.data), // ---- Engine persistence ---- saveEngine: () => client.post<{ status: string; chunks: number; documents: number }>("/engine/save").then(r => r.data), hasSavedState: () => client.get<{ exists: boolean }>("/engine/has-saved-state").then(r => r.data), // ---- Evaluation ---- getSimilarityDistribution: () => client.get("/eval/similarity-distribution").then(r => r.data), evalDisambiguation: (data: { ground_truth: { keyword: string; text: string; true_meaning: string }[]; candidate_meanings: Record }) => client.post<{ metrics: DisambiguationMetric[] }>("/eval/disambiguation", data).then(r => r.data), evalRetrieval: (data: { queries: { query: string; relevant_doc_ids?: string[]; relevant_texts?: string[] }[]; k_values: number[] }) => client.post<{ metrics: RetrievalMetric[] }>("/eval/retrieval", data).then(r => r.data), // ---- Word2Vec ---- w2vInit: (data: { corpus_texts: string[]; vector_size: number; window: number; epochs: number }) => client.post("/w2v/init", data, long).then(r => r.data), w2vInitFromEngine: (params?: { vector_size?: number; window?: number; epochs?: number }) => client.post(`/w2v/init-from-engine`, null, { ...long, params: { ...(_sign && { __sign: _sign }), ...params }, }).then(r => r.data), w2vStatus: () => client.get<{ ready: boolean; vocab_size?: number; sentences?: number; vector_size?: number; has_saved_state?: boolean }>("/w2v/status").then(r => r.data), w2vReset: () => client.post<{ status: string; message: string }>("/w2v/reset").then(r => r.data), w2vCompare: (data: { text_a: string; text_b: string }) => client.post("/w2v/compare", data).then(r => r.data), w2vQuery: (data: { text: string; top_k: number }) => client.post<{ query: string; results: W2VQueryResult[] }>("/w2v/query", data).then(r => r.data), w2vSimilarWords: (data: { word: string; top_k: number }) => client.post<{ word: string; similar: W2VSimilarWord[] }>("/w2v/similar-words", data).then(r => r.data), // ---- Dataset (HuggingFace) ---- datasetInfo: () => client.get("/dataset/info").then(r => r.data), datasetLoad: (data: DatasetLoadRequest) => client.post("/dataset/load", data, long).then(r => r.data), datasetPreview: (maxDocs: number = 10, sourceFilter?: string) => client.post(`/dataset/preview?max_docs=${maxDocs}${sourceFilter ? `&source_filter=${sourceFilter}` : ""}`).then(r => r.data), };