import requests import os import time API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" HF_TOKEN = os.getenv("HF_TOKEN") headers = { "Authorization": f"Bearer {HF_TOKEN}" } def query_model(prompt): payload = { "inputs": prompt, "parameters": { "max_new_tokens": 800, "temperature": 0.7 } } while True: response = requests.post(API_URL, headers=headers, json=payload) result = response.json() if isinstance(result, list): return result[0]["generated_text"] if "estimated_time" in result: time.sleep(result["estimated_time"]) else: return f"Error: {result}"