Spaces:
Sleeping
Sleeping
File size: 1,649 Bytes
a86c572 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import requests
import os
import time
HF_API_KEY = os.getenv("HF_API_KEY")
API_URL = (
"https://router.huggingface.co/hf-inference/models/google/flan-t5-base"
)
headers = {
"Authorization": f"Bearer {HF_API_KEY}",
"Content-Type": "application/json"
}
def build_prompt(question, chunks):
context = "\n".join([c[3] for c in chunks])
return f"""
You are a strict question answering system.
Answer ONLY using the context below.
If the answer is not present, say:
"I don't know based on the provided context."
Context:
{context}
Question:
{question}
Answer:
"""
def call_llm(prompt, max_retries=5, wait_seconds=6):
for _ in range(max_retries):
try:
response = requests.post(
API_URL,
headers=headers,
json={"inputs": prompt},
timeout=30
)
if not response.text:
time.sleep(wait_seconds)
continue
try:
data = response.json()
except ValueError:
time.sleep(wait_seconds)
continue
if isinstance(data, dict) and "error" in data:
if "loading" in data["error"].lower():
time.sleep(wait_seconds)
continue
return "I don't know based on the provided context"
if isinstance(data, list) and len(data) > 0:
return data[0].get("generated_text", "").strip()
except requests.exceptions.RequestException:
time.sleep(wait_seconds)
return "I don't know based on the provided context"
|