File size: 7,536 Bytes
db764ae f930251 db764ae e29b232 db764ae 9f87ec0 db764ae 9f87ec0 db764ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | import axios from "axios";
import type {
InitRequest, InitResponse, DocumentRequest, AddDocResponse, BuildIndexResponse,
QueryRequest, QueryResponse, CompareRequest, CompareResponse,
KeywordAnalysisRequest, KeywordAnalysisResponse,
KeywordMatchRequest, MatchResponse, BatchAnalysisRequest,
CorpusStats, SimilarityDistribution, DisambiguationMetric, RetrievalMetric,
TrainResponse, TrainEvalResponse,
W2VInitResponse, W2VQueryResult, W2VSimilarWord,
DatasetInfo, DatasetLoadRequest, DatasetLoadResponse, DatasetPreviewResponse,
ContextAnalysisResponse,
} from "./types";
// HuggingFace Spaces proxy requires the __sign token on every request.
// Extract it from the page URL and attach to all API calls.
const _params = new URLSearchParams(window.location.search);
const _sign = _params.get("__sign");
const client = axios.create({
baseURL: "/api",
...(_sign && { params: { __sign: _sign } }),
});
const long = { timeout: 600000 };
/** Extract a human-readable error message from an Axios error. */
export function getErrorMessage(err: unknown): string {
if (axios.isAxiosError(err)) {
if (err.code === "ECONNABORTED") return "Request timed out. The server may be busy.";
if (!err.response) return "Cannot connect to server. Is it running? (uv run python server.py)";
const detail = err.response.data?.detail;
if (typeof detail === "string") return detail;
if (typeof err.response.data === "string") return err.response.data;
return `Server error (${err.response.status})`;
}
if (err instanceof Error) return err.message;
return "An unexpected error occurred.";
}
/** Check if the backend is reachable. Returns null on success or an error message. */
export async function checkConnection(): Promise<string | null> {
try {
await client.get("/stats", { timeout: 5000 });
return null;
} catch (err) {
if (axios.isAxiosError(err) && err.response?.status === 400) {
// 400 = "Engine not initialized" — server is up, just no engine yet
return null;
}
return getErrorMessage(err);
}
}
/** Shared shape for all training requests (matches server TrainRequest). */
interface TrainRequestData {
corpus_texts: string[];
base_model: string;
output_path: string;
epochs: number;
batch_size: number;
}
export const api = {
// ---- Training ----
trainUnsupervised: (data: TrainRequestData) =>
client.post<TrainResponse>("/train/unsupervised", data, long).then(r => r.data),
trainContrastive: (data: TrainRequestData) =>
client.post<TrainResponse>("/train/contrastive", data, long).then(r => r.data),
trainKeywords: (data: TrainRequestData & { keyword_meanings: Record<string, string> }) =>
client.post<TrainResponse>("/train/keywords", data, long).then(r => r.data),
trainEvaluate: (data: { test_pairs: { text_a: string; text_b: string; expected: number }[]; trained_model_path: string; base_model: string; corpus_texts: string[] }) =>
client.post<TrainEvalResponse>("/train/evaluate", data).then(r => r.data),
// ---- Engine ----
init: (data: InitRequest) =>
client.post<InitResponse>("/init", data).then(r => r.data),
addDocument: (data: DocumentRequest) =>
client.post<AddDocResponse>("/documents", data).then(r => r.data),
buildIndex: () =>
client.post<BuildIndexResponse>("/index/build").then(r => r.data),
query: (data: QueryRequest) =>
client.post<QueryResponse>("/query", data).then(r => r.data),
compare: (data: CompareRequest) =>
client.post<CompareResponse>("/compare", data).then(r => r.data),
analyzeKeyword: (data: KeywordAnalysisRequest) =>
client.post<KeywordAnalysisResponse>("/analyze/keyword", data).then(r => r.data),
batchAnalyze: (data: BatchAnalysisRequest) =>
client.post<Record<string, KeywordAnalysisResponse>>("/analyze/batch", data).then(r => r.data),
matchKeyword: (data: KeywordMatchRequest) =>
client.post<MatchResponse>("/match", data).then(r => r.data),
analyzeContext: (data: { keyword: string; cluster_threshold?: number; top_words?: number }) =>
client.post<ContextAnalysisResponse>("/analyze/context", data).then(r => r.data),
similarWords: (data: { word: string; top_k: number }) =>
client.post<{ word: string; similar: { word: string; score: number }[] }>("/analyze/similar-words", data).then(r => r.data),
getStats: () =>
client.get<CorpusStats>("/stats").then(r => r.data),
pollLogs: (cursor: number = 0) =>
client.get<{ lines: string[]; cursor: number }>(`/logs/poll?cursor=${cursor}`).then(r => r.data),
getCorpusTexts: (maxDocs: number = 500) =>
client.get<{ documents: { doc_id: string; text: string }[]; count: number }>(`/corpus/texts?max_docs=${maxDocs}`).then(r => r.data),
getDocument: (docId: string) =>
client.get<{ doc_id: string; text: string; num_chunks: number }>(`/documents/${encodeURIComponent(docId)}`).then(r => r.data),
// ---- Engine persistence ----
saveEngine: () =>
client.post<{ status: string; chunks: number; documents: number }>("/engine/save").then(r => r.data),
hasSavedState: () =>
client.get<{ exists: boolean }>("/engine/has-saved-state").then(r => r.data),
// ---- Evaluation ----
getSimilarityDistribution: () =>
client.get<SimilarityDistribution>("/eval/similarity-distribution").then(r => r.data),
evalDisambiguation: (data: { ground_truth: { keyword: string; text: string; true_meaning: string }[]; candidate_meanings: Record<string, string[]> }) =>
client.post<{ metrics: DisambiguationMetric[] }>("/eval/disambiguation", data).then(r => r.data),
evalRetrieval: (data: { queries: { query: string; relevant_doc_ids?: string[]; relevant_texts?: string[] }[]; k_values: number[] }) =>
client.post<{ metrics: RetrievalMetric[] }>("/eval/retrieval", data).then(r => r.data),
// ---- Word2Vec ----
w2vInit: (data: { corpus_texts: string[]; vector_size: number; window: number; epochs: number }) =>
client.post<W2VInitResponse>("/w2v/init", data, long).then(r => r.data),
w2vInitFromEngine: (params?: { vector_size?: number; window?: number; epochs?: number }) =>
client.post<W2VInitResponse & { documents_used: number }>(`/w2v/init-from-engine`, null, {
...long,
params: { ...(_sign && { __sign: _sign }), ...params },
}).then(r => r.data),
w2vStatus: () =>
client.get<{ ready: boolean; vocab_size?: number; sentences?: number; vector_size?: number; has_saved_state?: boolean }>("/w2v/status").then(r => r.data),
w2vReset: () =>
client.post<{ status: string; message: string }>("/w2v/reset").then(r => r.data),
w2vCompare: (data: { text_a: string; text_b: string }) =>
client.post<CompareResponse>("/w2v/compare", data).then(r => r.data),
w2vQuery: (data: { text: string; top_k: number }) =>
client.post<{ query: string; results: W2VQueryResult[] }>("/w2v/query", data).then(r => r.data),
w2vSimilarWords: (data: { word: string; top_k: number }) =>
client.post<{ word: string; similar: W2VSimilarWord[] }>("/w2v/similar-words", data).then(r => r.data),
// ---- Dataset (HuggingFace) ----
datasetInfo: () =>
client.get<DatasetInfo>("/dataset/info").then(r => r.data),
datasetLoad: (data: DatasetLoadRequest) =>
client.post<DatasetLoadResponse>("/dataset/load", data, long).then(r => r.data),
datasetPreview: (maxDocs: number = 10, sourceFilter?: string) =>
client.post<DatasetPreviewResponse>(`/dataset/preview?max_docs=${maxDocs}${sourceFilter ? `&source_filter=${sourceFilter}` : ""}`).then(r => r.data),
};
|