Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| # Replace this with your actual endpoint URL | |
| API_URL = "https://lgj704z9p0j2vf79.us-east4.gcp.endpoints.huggingface.cloud" | |
| HF_ENDPOINT_TOKEN = os.environ.get("HF_ENDPOINT_TOKEN") | |
| headers = { | |
| "Authorization": f"Bearer {HF_ENDPOINT_TOKEN}", | |
| "Content-Type": "application/json" | |
| } | |
| def call_model(prompt: str) -> str: | |
| response = requests.post( | |
| f"{API_URL}/generate", # <-- use /generate for HF endpoints | |
| headers=headers, | |
| json={ | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": 2048, | |
| "temperature": 0.3, | |
| "do_sample": False | |
| } | |
| } | |
| ) | |
| if response.status_code != 200: | |
| raise RuntimeError(f"Inference error: {response.status_code} - {response.text}") | |
| result = response.json() | |
| # Handle variations in response format | |
| if isinstance(result, dict) and "generated_text" in result: | |
| return result["generated_text"] | |
| elif isinstance(result, list) and "generated_text" in result[0]: | |
| return result[0]["generated_text"] | |
| elif "text" in result: | |
| return result["text"] | |
| else: | |
| return "⚠️ No output generated." | |