File size: 1,649 Bytes
a86c572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import requests
import os
import time

HF_API_KEY = os.getenv("HF_API_KEY")

API_URL = (
    "https://router.huggingface.co/hf-inference/models/google/flan-t5-base"
)

headers = {
    "Authorization": f"Bearer {HF_API_KEY}",
    "Content-Type": "application/json"
}


def build_prompt(question, chunks):
    context = "\n".join([c[3] for c in chunks])

    return f"""
You are a strict question answering system.

Answer ONLY using the context below.
If the answer is not present, say:
"I don't know based on the provided context."

Context:
{context}

Question:
{question}

Answer:
"""


def call_llm(prompt, max_retries=5, wait_seconds=6):
    for _ in range(max_retries):
        try:
            response = requests.post(
                API_URL,
                headers=headers,
                json={"inputs": prompt},
                timeout=30
            )

            if not response.text:
                time.sleep(wait_seconds)
                continue

            try:
                data = response.json()
            except ValueError:
                time.sleep(wait_seconds)
                continue

            if isinstance(data, dict) and "error" in data:
                if "loading" in data["error"].lower():
                    time.sleep(wait_seconds)
                    continue
                return "I don't know based on the provided context"

            if isinstance(data, list) and len(data) > 0:
                return data[0].get("generated_text", "").strip()

        except requests.exceptions.RequestException:
            time.sleep(wait_seconds)

    return "I don't know based on the provided context"