Spaces:
Running
Running
File size: 4,871 Bytes
0214972 a64025f 330e02a 5d60eec a64025f 330e02a 0214972 cb13dc9 0214972 a64025f 0214972 cb13dc9 a64025f 2844ebb 330e02a a64025f 330e02a 2844ebb a64025f 330e02a a64025f 330e02a a64025f cb13dc9 a64025f 0214972 330e02a 2844ebb a64025f 2844ebb a64025f 2844ebb 330e02a 2844ebb 330e02a a64025f 5d60eec a64025f 5d60eec a64025f 70b94cb a64025f 70b94cb a64025f 70b94cb a64025f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | """
LLM module. HuggingFace Inference API as primary.
Works natively from HF Spaces — same infrastructure.
OpenRouter and Groq as fallback providers.
WHY HF Inference API?
HF Spaces can always reach HuggingFace's own APIs.
No network routing issues. Uses existing HF_TOKEN.
Same Llama 3.3 70B model as others.
"""
import os
import logging
from dotenv import load_dotenv
from tenacity import retry, stop_after_attempt, wait_exponential
load_dotenv()
logger = logging.getLogger(__name__)
# ── HuggingFace Inference API ─────────────────────────────
_hf_client = None
# ── OpenRouter (free tier, reliable fallback) ──────────────
_openrouter_client = None
# ── Groq fallback (works locally, may be blocked on HF Spaces) ──
_groq_client = None
def _init_hf():
global _hf_client
token = os.getenv("HF_TOKEN")
if not token:
logger.warning("HF_TOKEN not set — HF Inference API disabled")
return False
try:
from huggingface_hub import InferenceClient
_hf_client = InferenceClient(
model="meta-llama/Llama-3.3-70B-Instruct",
token=token
)
logger.info("HF Inference API ready (Llama-3.3-70B)")
return True
except Exception as e:
logger.error(f"HF Inference API init failed: {e}")
return False
def _init_openrouter():
global _openrouter_client
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
return False
try:
from openai import OpenAI
_openrouter_client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=api_key,
)
logger.info("OpenRouter ready as fallback")
return True
except Exception as e:
logger.error(f"OpenRouter init failed: {e}")
return False
def _init_groq():
global _groq_client
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
return False
try:
from groq import Groq
_groq_client = Groq(api_key=api_key)
logger.info("Groq ready as fallback")
return True
except Exception as e:
logger.error(f"Groq init failed: {e}")
return False
_hf_ready = _init_hf()
_openrouter_ready = _init_openrouter()
_groq_ready = _init_groq()
def _call_hf(messages: list) -> str:
"""Call HuggingFace Inference API."""
response = _hf_client.chat_completion(
messages=messages,
max_tokens=1500,
temperature=0.3,
)
return response.choices[0].message.content
def _call_openrouter(messages: list) -> str:
"""Call OpenRouter free tier."""
response = _openrouter_client.chat.completions.create(
model="meta-llama/llama-3.3-70b-instruct:free",
messages=messages,
max_tokens=1500,
temperature=0.3,
)
return response.choices[0].message.content
def _call_groq(messages: list) -> str:
"""Call Groq as fallback."""
response = _groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=messages,
temperature=0.3,
max_tokens=1500
)
return response.choices[0].message.content
def _call_with_fallback(messages: list) -> str:
"""Try HF first, then OpenRouter, then Groq."""
if _hf_ready and _hf_client:
try:
return _call_hf(messages)
except Exception as e:
logger.warning(f"HF Inference failed: {e}, trying OpenRouter")
if _openrouter_ready and _openrouter_client:
try:
return _call_openrouter(messages)
except Exception as e:
logger.warning(f"OpenRouter failed: {e}, trying Groq")
if _groq_ready and _groq_client:
try:
return _call_groq(messages)
except Exception as e:
logger.error(f"Groq also failed: {e}")
raise Exception("All LLM providers failed")
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=8))
def call_llm_raw(messages: list) -> str:
"""
Call LLM with pre-built messages list.
Used by V2 agent for Pass 1 and Pass 3.
"""
return _call_with_fallback(messages)
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=8))
def call_llm(query: str, context: str) -> str:
"""
Call LLM with query and context.
Used by V1 agent.
"""
messages = [
{
"role": "system",
"content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice."
},
{
"role": "user",
"content": f"QUESTION: {query}\n\nSOURCES:\n{context}\n\nAnswer based on sources. Cite judgment IDs."
}
]
return _call_with_fallback(messages) |