rahulrb99 commited on
Commit
4d81eeb
·
1 Parent(s): 5ccf778

fix: use router.huggingface.co for quiz (api-inference deprecated)

Browse files
Files changed (1) hide show
  1. backend/quiz_service.py +10 -15
backend/quiz_service.py CHANGED
@@ -1,20 +1,15 @@
1
  """
2
  Quiz generation service.
3
- Retrieves chunks from Supabase, sends to HF Inference API, saves artifact.
4
  """
5
 
6
  import os
7
  import json
8
  import re
9
- import requests
10
- from backend.db import supabase
11
- from backend.artifacts_service import create_artifact
12
- from huggingface_hub import InferenceClient
13
-
14
 
15
- HF_TOKEN = os.environ.get("HF_TOKEN", "")
16
- HF_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
17
- HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
18
 
19
  # Retrieval
20
  def _get_chunks_for_notebook(notebook_id: str, limit: int = 15) -> list[str]:
@@ -60,15 +55,15 @@ Format your response as a JSON array only, no preamble:
60
  Context:
61
  {context}"""
62
 
63
- # HF Inference Call
64
  def _call_hf(prompt: str) -> str:
65
- client = InferenceClient(token=HF_TOKEN)
66
- response = client.chat_completion(
67
- model=HF_MODEL,
68
  messages=[{"role": "user", "content": prompt}],
69
- max_tokens=3000, # ← increase this
70
  )
71
- return response.choices[0].message.content
72
 
73
  # Main Function
74
  def generate_quiz(notebook_id: str, source_type: str = "all", source_id: str = None) -> dict:
 
1
  """
2
  Quiz generation service.
3
+ Retrieves chunks from Supabase, sends to LLM via Hugging Face router, saves artifact.
4
  """
5
 
6
  import os
7
  import json
8
  import re
 
 
 
 
 
9
 
10
+ from backend.artifacts_service import create_artifact
11
+ from backend.db import supabase
12
+ from backend.llm_client import DEFAULT_MODEL, get_llm_client
13
 
14
  # Retrieval
15
  def _get_chunks_for_notebook(notebook_id: str, limit: int = 15) -> list[str]:
 
55
  Context:
56
  {context}"""
57
 
58
+ # LLM Call (uses router.huggingface.co via llm_client)
59
  def _call_hf(prompt: str) -> str:
60
+ client = get_llm_client()
61
+ response = client.chat.completions.create(
62
+ model=DEFAULT_MODEL,
63
  messages=[{"role": "user", "content": prompt}],
64
+ max_tokens=3000,
65
  )
66
+ return response.choices[0].message.content or ""
67
 
68
  # Main Function
69
  def generate_quiz(notebook_id: str, source_type: str = "all", source_id: str = None) -> dict: