Spaces:

HipFil98
/

PhDScout

Sleeping

HipFil98 Claude Sonnet 4.6 commited on Mar 19

Commit

55df7c0

1 Parent(s): 51f8256

feat: add Groq backend as primary free inference provider

- Add Groq (OpenAI-compatible) backend to LLMClient and config
- Auto-select Groq when GROQ_API_KEY secret is set, fall back to HF
- Update JobAgent to accept backend + api_key instead of HF-only token
- Raise LLMQuotaError (402) explicitly instead of silently scoring 0
- Switch model list to Groq models (llama-3.3-70b, llama-3.1-8b, gemma2, mixtral)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (5) hide show

agent/job_matcher.py +3 -1
agent/llm_client.py +76 -2
agent/pipeline.py +6 -6
app.py +28 -9
config.py +11 -1

agent/job_matcher.py CHANGED Viewed

@@ -4,7 +4,7 @@ from __future__ import annotations
 from typing import Any, TypedDict
-from agent.llm_client import LLMClient
 from agent.utils import parse_json, job_institution, job_description
@@ -101,6 +101,8 @@ class JobMatcher:
         try:
             raw = self.llm.generate(system=_SYSTEM, user=prompt, json_mode=True)
         except RuntimeError as exc:
             return _fallback(str(exc))

 from typing import Any, TypedDict
+from agent.llm_client import LLMClient, LLMQuotaError
 from agent.utils import parse_json, job_institution, job_description
         try:
             raw = self.llm.generate(system=_SYSTEM, user=prompt, json_mode=True)
+        except LLMQuotaError:
+            raise  # propagate — caller should surface this to the user
         except RuntimeError as exc:
             return _fallback(str(exc))

agent/llm_client.py CHANGED Viewed

@@ -19,6 +19,10 @@ from typing import Iterator
 from config import config
 class LLMClient:
     """Unified LLM client supporting Ollama and HuggingFace backends.
@@ -42,6 +46,7 @@ class LLMClient:
         # Lazily initialised clients
         self._openai_client = None
         self._hf_client = None
     # ------------------------------------------------------------------
     # Internal: backend initialisation
@@ -63,6 +68,23 @@ class LLMClient:
             )
         return self._openai_client
     def _get_hf_client(self):
         """Return (and cache) a huggingface_hub.InferenceClient."""
         if self._hf_client is None:
@@ -129,12 +151,14 @@ class LLMClient:
         if self.backend == "ollama":
             return self._generate_ollama(system, user, json_mode=json_mode)
         elif self.backend == "huggingface":
             return self._generate_hf(system, user)
         else:
             raise RuntimeError(
                 f"Unknown LLM backend: '{self.backend}'. "
-                "Set LLM_BACKEND=ollama or LLM_BACKEND=huggingface in your .env."
             )
     def stream_generate(self, system: str, user: str) -> Iterator[str]:
@@ -149,6 +173,8 @@ class LLMClient:
         """
         if self.backend == "ollama":
             yield from self._stream_ollama(system, user)
         elif self.backend == "huggingface":
             yield from self._stream_hf(system, user)
         else:
@@ -212,6 +238,47 @@ class LLMClient:
                 ) from exc
             raise RuntimeError(f"Ollama streaming failed: {exc}") from exc
     # ------------------------------------------------------------------
     # HuggingFace implementation (via InferenceClient)
     # ------------------------------------------------------------------
@@ -232,7 +299,14 @@ class LLMClient:
                 return result.choices[0].message.content or ""
             except Exception as exc:
                 last_exc = exc
-                if "503" in str(exc) or "502" in str(exc) or "529" in str(exc):
                     import time as _time
                     _time.sleep(2 ** attempt)  # 1s, 2s, 4s
                     continue

 from config import config
+class LLMQuotaError(RuntimeError):
+    """Raised when the HuggingFace free-tier quota is exhausted (HTTP 402)."""
 class LLMClient:
     """Unified LLM client supporting Ollama and HuggingFace backends.
         # Lazily initialised clients
         self._openai_client = None
         self._hf_client = None
+        self._groq_client = None
     # ------------------------------------------------------------------
     # Internal: backend initialisation
             )
         return self._openai_client
+    def _get_groq_client(self):
+        """Return (and cache) an openai.OpenAI client pointed at Groq."""
+        if not hasattr(self, "_groq_client") or self._groq_client is None:
+            try:
+                from openai import OpenAI  # type: ignore
+            except ImportError as exc:
+                raise ImportError(
+                    "The 'openai' package is required for the Groq backend.\n"
+                    "Install it with:  pip install openai>=1.0.0"
+                ) from exc
+            api_key = self._token_override or config.groq_api_key
+            self._groq_client = OpenAI(
+                base_url=config.groq_base_url,
+                api_key=api_key,
+            )
+        return self._groq_client
     def _get_hf_client(self):
         """Return (and cache) a huggingface_hub.InferenceClient."""
         if self._hf_client is None:
         if self.backend == "ollama":
             return self._generate_ollama(system, user, json_mode=json_mode)
+        elif self.backend == "groq":
+            return self._generate_groq(system, user, json_mode=json_mode)
         elif self.backend == "huggingface":
             return self._generate_hf(system, user)
         else:
             raise RuntimeError(
                 f"Unknown LLM backend: '{self.backend}'. "
+                "Set LLM_BACKEND=ollama, groq, or huggingface in your .env."
             )
     def stream_generate(self, system: str, user: str) -> Iterator[str]:
         """
         if self.backend == "ollama":
             yield from self._stream_ollama(system, user)
+        elif self.backend == "groq":
+            yield from self._stream_groq(system, user)
         elif self.backend == "huggingface":
             yield from self._stream_hf(system, user)
         else:
                 ) from exc
             raise RuntimeError(f"Ollama streaming failed: {exc}") from exc
+    # ------------------------------------------------------------------
+    # Groq implementation (OpenAI-compatible API)
+    # ------------------------------------------------------------------
+    def _generate_groq(self, system: str, user: str, json_mode: bool = False) -> str:
+        client = self._get_groq_client()
+        kwargs: dict = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            "max_tokens": config.max_tokens,
+        }
+        if json_mode:
+            kwargs["response_format"] = {"type": "json_object"}
+        try:
+            response = client.chat.completions.create(**kwargs)
+            return response.choices[0].message.content or ""
+        except Exception as exc:
+            raise RuntimeError(f"Groq inference failed: {exc}") from exc
+    def _stream_groq(self, system: str, user: str) -> Iterator[str]:
+        client = self._get_groq_client()
+        try:
+            stream = client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": system},
+                    {"role": "user", "content": user},
+                ],
+                max_tokens=config.max_tokens,
+                stream=True,
+            )
+            for chunk in stream:
+                delta = chunk.choices[0].delta
+                if delta and delta.content:
+                    yield delta.content
+        except Exception as exc:
+            raise RuntimeError(f"Groq streaming failed: {exc}") from exc
     # ------------------------------------------------------------------
     # HuggingFace implementation (via InferenceClient)
     # ------------------------------------------------------------------
                 return result.choices[0].message.content or ""
             except Exception as exc:
                 last_exc = exc
+                err_str = str(exc)
+                if "402" in err_str:
+                    raise LLMQuotaError(
+                        "HuggingFace quota exceeded (402 Payment Required).\n"
+                        "Switch to a different model in the LLM Settings panel, "
+                        "or wait for your free-tier quota to reset."
+                    ) from exc
+                if "503" in err_str or "502" in err_str or "529" in err_str:
                     import time as _time
                     _time.sleep(2 ** attempt)  # 1s, 2s, 4s
                     continue

agent/pipeline.py CHANGED Viewed

@@ -15,16 +15,16 @@ from agent.cover_letter import CoverLetterWriter
 class JobAgent:
     """Orchestrates CV parsing, job search, scoring, and application generation.
-    Each instance holds its own LLM client configured with the given token and
-    model — safe to instantiate per-request (no shared mutable state).
     Args:
-        token:   HuggingFace API token.
-        model:   HuggingFace model ID (e.g. "mistralai/Mistral-7B-Instruct-v0.3").
     """
-    def __init__(self, token: str, model: str) -> None:
-        self.llm = LLMClient(model=model, backend="huggingface", token=token)
         self.parser = CVParser(self.llm)
         self.searcher = JobSearcher()
         self.matcher = JobMatcher(self.llm)

 class JobAgent:
     """Orchestrates CV parsing, job search, scoring, and application generation.
+    Each instance holds its own LLM client — safe to instantiate per-request.
     Args:
+        model:    Model ID for the selected backend.
+        backend:  "groq" | "huggingface" | "ollama"
+        api_key:  API key for the selected backend (not needed for Ollama).
     """
+    def __init__(self, model: str, backend: str = "groq", api_key: str = "") -> None:
+        self.llm = LLMClient(model=model, backend=backend, token=api_key or None)
         self.parser = CVParser(self.llm)
         self.searcher = JobSearcher()
         self.matcher = JobMatcher(self.llm)

app.py CHANGED Viewed

@@ -174,13 +174,13 @@ def run_search(
         return _err("Please upload a CV file first.")
     if not field or not field.strip():
         return _err("Please enter a research field.")
-    if not _SHARED_TOKEN:
-        return _err("No HF_TOKEN configured. Set it as a Space secret.")
     try:
         from agent.pipeline import JobAgent
-        agent = JobAgent(token=_SHARED_TOKEN, model=model_name)
         cv_path = cv_file if isinstance(cv_file, str) else cv_file.name
         progress(0, desc="Parsing CV...")
@@ -246,7 +246,7 @@ def load_position(
         match: dict = job.get("match") or {}
         from agent.pipeline import JobAgent
-        agent = JobAgent(token=_SHARED_TOKEN, model=model_name)
         progress(0.3, desc="Generating tailoring hints...")
         hints, cover_letter = agent.prepare_application(job, profile_text)
@@ -270,7 +270,7 @@ def regenerate_letter(
         return "*No position loaded.*"
     try:
         from agent.pipeline import JobAgent
-        agent = JobAgent(token=_SHARED_TOKEN, model=model_name)
         progress(0.3, desc="Regenerating cover letter...")
         result = agent.regenerate_letter(scored_jobs[current_idx], profile_text)
         progress(1.0)
@@ -372,7 +372,14 @@ def export_zip(approved: list) -> tuple:
 # Gradio Blocks layout
 # ---------------------------------------------------------------------------
-MODELS = [
     "Qwen/Qwen2.5-7B-Instruct",
     "meta-llama/Llama-3.2-3B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
@@ -401,8 +408,20 @@ LOCATIONS = [
     "South Africa", "Israel",
 ]
-# If a shared token is configured via Space Secrets, users don't need to provide one.
-_SHARED_TOKEN = os.environ.get("HF_TOKEN", "")
 with gr.Blocks(
     theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
@@ -464,7 +483,7 @@ with gr.Blocks(
                         label="Model",
                         choices=MODELS,
                         value=MODELS[0],
-                        info="All are free via HF Inference API",
                     )
             search_btn = gr.Button("Parse CV & Search Positions", variant="primary", size="lg")
             search_status = gr.Markdown("*Ready. Fill in the form and click Search.*")

         return _err("Please upload a CV file first.")
     if not field or not field.strip():
         return _err("Please enter a research field.")
+    if not _API_KEY and _BACKEND != "ollama":
+        return _err("No API key configured. Set GROQ_API_KEY (or HF_TOKEN) as a Space secret.")
     try:
         from agent.pipeline import JobAgent
+        agent = JobAgent(model=model_name, backend=_BACKEND, api_key=_API_KEY)
         cv_path = cv_file if isinstance(cv_file, str) else cv_file.name
         progress(0, desc="Parsing CV...")
         match: dict = job.get("match") or {}
         from agent.pipeline import JobAgent
+        agent = JobAgent(model=model_name, backend=_BACKEND, api_key=_API_KEY)
         progress(0.3, desc="Generating tailoring hints...")
         hints, cover_letter = agent.prepare_application(job, profile_text)
         return "*No position loaded.*"
     try:
         from agent.pipeline import JobAgent
+        agent = JobAgent(model=model_name, backend=_BACKEND, api_key=_API_KEY)
         progress(0.3, desc="Regenerating cover letter...")
         result = agent.regenerate_letter(scored_jobs[current_idx], profile_text)
         progress(1.0)
 # Gradio Blocks layout
 # ---------------------------------------------------------------------------
+GROQ_MODELS = [
+    "llama-3.3-70b-versatile",
+    "llama-3.1-8b-instant",
+    "gemma2-9b-it",
+    "mixtral-8x7b-32768",
+]
+HF_MODELS = [
     "Qwen/Qwen2.5-7B-Instruct",
     "meta-llama/Llama-3.2-3B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
     "South Africa", "Israel",
 ]
+# Backend selection: Groq takes priority over HuggingFace
+_GROQ_KEY = os.environ.get("GROQ_API_KEY", "")
+_HF_TOKEN = os.environ.get("HF_TOKEN", "")
+if _GROQ_KEY:
+    _BACKEND = "groq"
+    _API_KEY = _GROQ_KEY
+    MODELS = GROQ_MODELS
+    _MODEL_INFO = "Free via Groq — no user limits"
+else:
+    _BACKEND = "huggingface"
+    _API_KEY = _HF_TOKEN
+    MODELS = HF_MODELS
+    _MODEL_INFO = "Free via HuggingFace Inference API"
 with gr.Blocks(
     theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
                         label="Model",
                         choices=MODELS,
                         value=MODELS[0],
+                        info=_MODEL_INFO,
                     )
             search_btn = gr.Button("Parse CV & Search Positions", variant="primary", size="lg")
             search_status = gr.Markdown("*Ready. Fill in the form and click Search.*")

config.py CHANGED Viewed

@@ -47,6 +47,10 @@ class AppConfig:
         default_factory=lambda: os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
     )
     # Generation settings
     max_tokens: int = 4096
@@ -95,10 +99,16 @@ class AppConfig:
                     "Free HuggingFace inference may be rate-limited or unavailable. "
                     "Get a free key at https://huggingface.co/settings/tokens"
                 )
         else:
             print(
                 f"[WARNING] Unknown LLM_BACKEND '{self.llm_backend}'. "
-                "Supported values: 'ollama', 'huggingface'."
             )

         default_factory=lambda: os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
     )
+    # Groq settings (recommended free cloud backend)
+    groq_api_key: str = field(default_factory=lambda: os.getenv("GROQ_API_KEY", ""))
+    groq_base_url: str = "https://api.groq.com/openai/v1"
     # Generation settings
     max_tokens: int = 4096
                     "Free HuggingFace inference may be rate-limited or unavailable. "
                     "Get a free key at https://huggingface.co/settings/tokens"
                 )
+        elif self.llm_backend == "groq":
+            if not self.groq_api_key:
+                print(
+                    "[WARNING] GROQ_API_KEY is not set. "
+                    "Get a free key at https://console.groq.com/keys"
+                )
         else:
             print(
                 f"[WARNING] Unknown LLM_BACKEND '{self.llm_backend}'. "
+                "Supported values: 'ollama', 'huggingface', 'groq'."
             )