File size: 1,219 Bytes
33ecdc5
d655b62
2ec91ed
d655b62
bb9adf1
d655b62
 
 
 
 
 
2ec91ed
 
d655b62
dcb6f6f
d655b62
 
dcb6f6f
 
 
 
 
 
d655b62
 
 
 
 
 
 
dcb6f6f
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import requests

# Replace this with your actual endpoint URL
API_URL = "https://lgj704z9p0j2vf79.us-east4.gcp.endpoints.huggingface.cloud"
HF_ENDPOINT_TOKEN = os.environ.get("HF_ENDPOINT_TOKEN")

headers = {
    "Authorization": f"Bearer {HF_ENDPOINT_TOKEN}",
    "Content-Type": "application/json"
}

def call_model(prompt: str) -> str:
    response = requests.post(
        f"{API_URL}/generate",  # <-- use /generate for HF endpoints
        headers=headers,
        json={
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 2048,
                "temperature": 0.3,
                "do_sample": False
            }
        }
    )

    if response.status_code != 200:
        raise RuntimeError(f"Inference error: {response.status_code} - {response.text}")

    result = response.json()
    
    # Handle variations in response format
    if isinstance(result, dict) and "generated_text" in result:
        return result["generated_text"]
    elif isinstance(result, list) and "generated_text" in result[0]:
        return result[0]["generated_text"]
    elif "text" in result:
        return result["text"]
    else:
        return "⚠️ No output generated."