File size: 2,290 Bytes
82549b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
"""Smoke-test Hugging Face Inference API (embeddings + chat) using your .env.

Run from the repository root (so `.env` is found):

  LLM_PROVIDER=huggingface uv run python scripts/verify_huggingface_inference.py

Or set `LLM_PROVIDER=huggingface` in `.env` together with `HUGGINGFACE_API_KEY`, then:

  uv run python scripts/verify_huggingface_inference.py
"""

from __future__ import annotations

import os
import sys
from pathlib import Path

# Ensure repo root is cwd so pydantic-settings loads `.env`.
_ROOT = Path(__file__).resolve().parent.parent
os.chdir(_ROOT)
if str(_ROOT) not in sys.path:
    sys.path.insert(0, str(_ROOT))

from langchain_core.messages import HumanMessage

from api.config import get_settings
from rag.embedder import create_embedding_function
from rag.retriever import _create_chat_model


def main() -> int:
    get_settings.cache_clear()
    settings = get_settings()

    if settings.llm_provider.lower() != "huggingface":
        print(
            "Set LLM_PROVIDER=huggingface in your environment or `.env` before running this script.\n"
            f"Current LLM_PROVIDER={settings.llm_provider!r}",
            file=sys.stderr,
        )
        return 1

    if not (settings.huggingface_api_key or "").strip():
        print(
            "Missing token: set HUGGINGFACE_API_KEY or HF_TOKEN in `.env`.\n"
            "Current LLM_PROVIDER is huggingface but no Hugging Face token resolved.",
            file=sys.stderr,
        )
        return 1

    print("Model:", settings.huggingface_model)
    print("Embedding model:", settings.huggingface_embedding_model)
    print("--- Embeddings ---")
    emb = create_embedding_function()
    vec = emb.embed_query("hello from DocuAudit local smoke test")
    print(f"OK: single query embedding length = {len(vec)}")

    print("--- Chat (Inference API) ---")
    llm = _create_chat_model(settings)
    msg = HumanMessage(content='Reply with exactly the word "ok" and nothing else.')
    out = llm.invoke([msg])
    text = (getattr(out, "content", None) or str(out)).strip()
    print("OK: chat response (first 200 chars):", text[:200])

    print("\nHugging Face path looks good for deployment smoke checks.")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())