Spaces:

RayMelius
/

soci2

Running

RayMelius Claude Sonnet 4.6 commited on 17 days ago

Commit

beb2a11

1 Parent(s): 5e37c7d

Clean up token env vars; add Gemini to free provider menu

- HFInferenceClient: drop hf_soci_token / soci_token / HW_WR_TOKEN —
HF_TOKEN is the only supported var (auto-injected by HF Spaces)
- create_llm_client: same — only HF_TOKEN triggers HF auto-detection
- _choose_provider: show Groq + Gemini (both free-tier) before HF and Ollama;
remove has_claude (Claude never appeared in the menu anyway)
- HF now labelled "requires HF PRO / credits" to set correct expectations
- SOCI_PROVIDER / LLM_PROVIDER env var still overrides everything (can force
any provider including claude)

Tokens safe to delete from HF settings: soci_token, soci_read, hf_soci_token
SOCI_PROVIDER is not a secret — safe to use as a public Space variable

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show

src/soci/api/server.py +8 -12
src/soci/engine/llm.py +9 -22

src/soci/api/server.py CHANGED Viewed

@@ -253,23 +253,19 @@ def _choose_provider() -> str:
     if provider in ("claude", "groq", "gemini", "hf", "ollama"):
         return provider
-    # Check if keys are available
-    has_claude = bool(os.environ.get("ANTHROPIC_API_KEY"))
     has_groq = bool(os.environ.get("GROQ_API_KEY"))
-    has_hf = bool(
-        os.environ.get("HF_TOKEN")
-        or os.environ.get("hf_soci_token")
-        or os.environ.get("soci_token")
-        or os.environ.get("HW_WR_TOKEN")
-    )
-    # Only free providers are offered; paid providers (Claude, Gemini) can be
-    # forced via SOCI_PROVIDER / LLM_PROVIDER env var if needed.
     options = []
-    if has_hf:
-        options.append(("hf", "HF Inference (free, SmolLM3 via hf-inference)"))
     if has_groq:
         options.append(("groq", "Groq (free tier, 30 req/min)"))
     options.append(("ollama", "Ollama (local, free, no rate limit)"))
     # If only one option, use it

     if provider in ("claude", "groq", "gemini", "hf", "ollama"):
         return provider
+    # Check which keys are available
     has_groq = bool(os.environ.get("GROQ_API_KEY"))
+    has_gemini = bool(os.environ.get("GEMINI_API_KEY"))
+    has_hf = bool(os.environ.get("HF_TOKEN"))
+    # Free providers only; Claude can be forced via SOCI_PROVIDER / LLM_PROVIDER.
     options = []
     if has_groq:
         options.append(("groq", "Groq (free tier, 30 req/min)"))
+    if has_gemini:
+        options.append(("gemini", "Gemini (free tier, 15 req/min via AI Studio)"))
+    if has_hf:
+        options.append(("hf", "HF Inference (requires HF PRO / credits)"))
     options.append(("ollama", "Ollama (local, free, no rate limit)"))
     # If only one option, use it

src/soci/engine/llm.py CHANGED Viewed

@@ -813,12 +813,11 @@ class GeminiClient:
 # ============================================================
 class HFInferenceClient:
-    """Hugging Face Serverless Inference via OpenAI-compatible endpoint.
-    Free tier (no credit card required):
-      - Llama-3.2-3B-Instruct, Qwen2.5-7B-Instruct, Mistral-7B, and many others.
-      - HF_TOKEN is auto-injected in HF Spaces — no manual setup needed.
-      - Get a token at https://huggingface.co/settings/tokens
     """
     def __init__(
@@ -827,20 +826,10 @@ class HFInferenceClient:
         default_model: str = MODEL_HF_SMOL,
         max_retries: int = 3,
     ) -> None:
-        # Priority: explicit arg → named secrets (personal token) → Space auto-injected HF_TOKEN
-        # HF_TOKEN is auto-injected in HF Spaces but only has basic inference (no credits for routed models).
-        # A personal token stored as hf_soci_token / soci_token / HW_WR_TOKEN takes precedence.
-        self.api_key = (
-            api_key
-            or os.environ.get("hf_soci_token", "")
-            or os.environ.get("soci_token", "")
-            or os.environ.get("HW_WR_TOKEN", "")
-            or os.environ.get("HF_TOKEN", "")
-        )
         if not self.api_key:
             logger.warning(
-                "Neither HF_TOKEN nor soci_token is set — HF Inference will not make LLM calls. "
-                "Get a free token at https://huggingface.co/settings/tokens"
             )
         self.default_model = default_model
         self.max_retries = max_retries
@@ -888,7 +877,7 @@ class HFInferenceClient:
         max_tokens: int = 1024,
     ) -> str:
         if not self.api_key:
-            self._last_error = "HF_TOKEN / soci_token not set — add it to your HF Space secrets"
             return ""
         if self._is_quota_exhausted():
             logger.debug("HF quota circuit breaker active — skipping complete()")
@@ -939,12 +928,11 @@ class HFInferenceClient:
                 elif status in (401, 402, 403, 410):
                     # Auth/payment failure — circuit-break for 1h to stop spam retries.
                     # 402 means no credits (token lacks Inference Providers permission).
-                    # Fix: add hf_soci_token secret in Space with a token that has inference perms.
                     self._rate_limited_until = time.monotonic() + 3600
                     self._auth_error = body
                     logger.error(
                         f"HF auth error ({status}): {body[:120]} — "
-                        "Add hf_soci_token Space secret with a token that has Inference Providers permission"
                     )
                     return ""
                 elif status in (503, 504):
@@ -1033,8 +1021,7 @@ def create_llm_client(
             provider = PROVIDER_GROQ
         elif os.environ.get("GEMINI_API_KEY"):
             provider = PROVIDER_GEMINI
-        elif (os.environ.get("HF_TOKEN") or os.environ.get("hf_soci_token")
-              or os.environ.get("soci_token") or os.environ.get("HW_WR_TOKEN")):
             provider = PROVIDER_HF
         else:
             provider = PROVIDER_OLLAMA

 # ============================================================
 class HFInferenceClient:
+    """Hugging Face Serverless Inference via router.huggingface.co/v1.
+    Requires an HF_TOKEN with 'Inference Providers (Write)' permission.
+    HF_TOKEN is auto-injected in HF Spaces but only has repo-read access;
+    a PRO account or purchased credits is needed for LLM inference.
     """
     def __init__(
         default_model: str = MODEL_HF_SMOL,
         max_retries: int = 3,
     ) -> None:
+        self.api_key = api_key or os.environ.get("HF_TOKEN", "")
         if not self.api_key:
             logger.warning(
+                "HF_TOKEN is not set — HF Inference will not make LLM calls."
             )
         self.default_model = default_model
         self.max_retries = max_retries
         max_tokens: int = 1024,
     ) -> str:
         if not self.api_key:
+            self._last_error = "HF_TOKEN not set"
             return ""
         if self._is_quota_exhausted():
             logger.debug("HF quota circuit breaker active — skipping complete()")
                 elif status in (401, 402, 403, 410):
                     # Auth/payment failure — circuit-break for 1h to stop spam retries.
                     # 402 means no credits (token lacks Inference Providers permission).
                     self._rate_limited_until = time.monotonic() + 3600
                     self._auth_error = body
                     logger.error(
                         f"HF auth error ({status}): {body[:120]} — "
+                        "HF_TOKEN needs 'Inference Providers (Write)' permission"
                     )
                     return ""
                 elif status in (503, 504):
             provider = PROVIDER_GROQ
         elif os.environ.get("GEMINI_API_KEY"):
             provider = PROVIDER_GEMINI
+        elif os.environ.get("HF_TOKEN"):
             provider = PROVIDER_HF
         else:
             provider = PROVIDER_OLLAMA