CaffeinatedCoding commited on
Commit
330e02a
·
verified ·
1 Parent(s): 2844ebb

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. src/llm.py +24 -21
requirements.txt CHANGED
@@ -6,6 +6,7 @@ fastapi
6
  uvicorn
7
  python-dotenv
8
  groq
 
9
  dvc
10
  mlflow
11
  optuna
 
6
  uvicorn
7
  python-dotenv
8
  groq
9
+ openai
10
  dvc
11
  mlflow
12
  optuna
src/llm.py CHANGED
@@ -1,12 +1,12 @@
1
  """
2
  LLM module. HuggingFace Inference API as primary.
3
  Works natively from HF Spaces — same infrastructure.
4
- Groq as local dev fallback.
5
 
6
  WHY HF Inference API?
7
  HF Spaces can always reach HuggingFace's own APIs.
8
  No network routing issues. Uses existing HF_TOKEN.
9
- Same Llama 3.3 70B model as Groq.
10
  """
11
 
12
  import os
@@ -23,6 +23,10 @@ _hf_client = None
23
  # ── OpenRouter (free tier, reliable fallback) ──────────────
24
  _openrouter_client = None
25
 
 
 
 
 
26
  def _init_hf():
27
  global _hf_client
28
  token = os.getenv("HF_TOKEN")
@@ -41,6 +45,7 @@ def _init_hf():
41
  logger.error(f"HF Inference API init failed: {e}")
42
  return False
43
 
 
44
  def _init_openrouter():
45
  global _openrouter_client
46
  api_key = os.getenv("OPENROUTER_API_KEY")
@@ -58,9 +63,6 @@ def _init_openrouter():
58
  logger.error(f"OpenRouter init failed: {e}")
59
  return False
60
 
61
- # ── Groq fallback (works locally, may be blocked on HF Spaces) ──
62
- _openrouter_ready = _init_openrouter()
63
- _groq_client = None
64
 
65
  def _init_groq():
66
  global _groq_client
@@ -76,13 +78,14 @@ def _init_groq():
76
  logger.error(f"Groq init failed: {e}")
77
  return False
78
 
 
79
  _hf_ready = _init_hf()
 
80
  _groq_ready = _init_groq()
81
 
82
 
83
  def _call_hf(messages: list) -> str:
84
  """Call HuggingFace Inference API."""
85
- # Convert to HF format
86
  response = _hf_client.chat_completion(
87
  messages=messages,
88
  max_tokens=1500,
@@ -90,7 +93,8 @@ def _call_hf(messages: list) -> str:
90
  )
91
  return response.choices[0].message.content
92
 
93
- openrouter(messages: list) -> str:
 
94
  """Call OpenRouter free tier."""
95
  response = _openrouter_client.chat.completions.create(
96
  model="meta-llama/llama-3.3-70b-instruct:free",
@@ -101,9 +105,19 @@ openrouter(messages: list) -> str:
101
  return response.choices[0].message.content
102
 
103
 
104
- def _call_
105
  def _call_groq(messages: list) -> str:
106
- """Call Groq as fthen OpenRouter, then Groq."""
 
 
 
 
 
 
 
 
 
 
 
107
  if _hf_ready and _hf_client:
108
  try:
109
  return _call_hf(messages)
@@ -114,18 +128,7 @@ def _call_groq(messages: list) -> str:
114
  try:
115
  return _call_openrouter(messages)
116
  except Exception as e:
117
- logger.warning(f"OpenRouter
118
- )
119
- return response.choices[0].message.content
120
-
121
-
122
- def _call_with_fallback(messages: list) -> str:
123
- """Try HF first, fall back to Groq."""
124
- if _hf_ready and _hf_client:
125
- try:
126
- return _call_hf(messages)
127
- except Exception as e:
128
- logger.warning(f"HF Inference failed: {e}, trying Groq")
129
 
130
  if _groq_ready and _groq_client:
131
  try:
 
1
  """
2
  LLM module. HuggingFace Inference API as primary.
3
  Works natively from HF Spaces — same infrastructure.
4
+ OpenRouter and Groq as fallback providers.
5
 
6
  WHY HF Inference API?
7
  HF Spaces can always reach HuggingFace's own APIs.
8
  No network routing issues. Uses existing HF_TOKEN.
9
+ Same Llama 3.3 70B model as others.
10
  """
11
 
12
  import os
 
23
  # ── OpenRouter (free tier, reliable fallback) ──────────────
24
  _openrouter_client = None
25
 
26
+ # ── Groq fallback (works locally, may be blocked on HF Spaces) ──
27
+ _groq_client = None
28
+
29
+
30
  def _init_hf():
31
  global _hf_client
32
  token = os.getenv("HF_TOKEN")
 
45
  logger.error(f"HF Inference API init failed: {e}")
46
  return False
47
 
48
+
49
  def _init_openrouter():
50
  global _openrouter_client
51
  api_key = os.getenv("OPENROUTER_API_KEY")
 
63
  logger.error(f"OpenRouter init failed: {e}")
64
  return False
65
 
 
 
 
66
 
67
  def _init_groq():
68
  global _groq_client
 
78
  logger.error(f"Groq init failed: {e}")
79
  return False
80
 
81
+
82
  _hf_ready = _init_hf()
83
+ _openrouter_ready = _init_openrouter()
84
  _groq_ready = _init_groq()
85
 
86
 
87
  def _call_hf(messages: list) -> str:
88
  """Call HuggingFace Inference API."""
 
89
  response = _hf_client.chat_completion(
90
  messages=messages,
91
  max_tokens=1500,
 
93
  )
94
  return response.choices[0].message.content
95
 
96
+
97
+ def _call_openrouter(messages: list) -> str:
98
  """Call OpenRouter free tier."""
99
  response = _openrouter_client.chat.completions.create(
100
  model="meta-llama/llama-3.3-70b-instruct:free",
 
105
  return response.choices[0].message.content
106
 
107
 
 
108
  def _call_groq(messages: list) -> str:
109
+ """Call Groq as fallback."""
110
+ response = _groq_client.chat.completions.create(
111
+ model="llama-3.3-70b-versatile",
112
+ messages=messages,
113
+ temperature=0.3,
114
+ max_tokens=1500
115
+ )
116
+ return response.choices[0].message.content
117
+
118
+
119
+ def _call_with_fallback(messages: list) -> str:
120
+ """Try HF first, then OpenRouter, then Groq."""
121
  if _hf_ready and _hf_client:
122
  try:
123
  return _call_hf(messages)
 
128
  try:
129
  return _call_openrouter(messages)
130
  except Exception as e:
131
+ logger.warning(f"OpenRouter failed: {e}, trying Groq")
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  if _groq_ready and _groq_client:
134
  try: