Spaces:

Kesherat
/

blade-inspection-demo

Running

blade-inspection-demo / gptoss_wrapper.py

Kesheratmex

**Improve GPT wrapper loading and HF token handling**

a4acfba 4 months ago

6.64 kB

	"""
	GPTOSSWrapper - Simple integration wrapper for OpenAI or Hugging Face Inference API.

	Usage:
	from gptoss_wrapper import GPTOSSWrapper
	w = GPTOSSWrapper(model="gpt-oss-120")
	text = w.generate(prompt)

	Behavior:
	- Provider selection (priority):
	1) If OPENAI_API_KEY is set -> use OpenAI Chat Completions (v1/chat/completions)
	2) Else if HUGGINGFACE_API_TOKEN or HF_API_TOKEN is set -> use Hugging Face Inference API
	3) Else -> generate() will raise a RuntimeError describing missing credentials.

	Note for Spaces:
	- Add the secret in your Space settings (Settings → Secrets & variables → Add secret):
	- For OpenAI: key name = OPENAI_API_KEY, value = <your_openai_api_key>
	- For Hugging Face: key name = HUGGINGFACE_API_TOKEN (or HF_API_TOKEN), value = <your_hf_token>

	This file intentionally uses only the requests stdlib-friendly HTTP approach to avoid depending on extra SDKs.
	"""
	import os
	import time
	import requests
	from typing import Optional


	class GPTOSSWrapper:
	"""
	Lightweight wrapper that can call either OpenAI or Hugging Face inference endpoints.

	Constructor:
	GPTOSSWrapper(model="gpt-oss-120", provider="auto")

	- model: model name to request (for OpenAI it must be an available model for your account;
	for Hugging Face it should be a model id hosted on HF).
	- provider: "auto" (default) \| "openai" \| "hf"
	"""

	def __init__(self, model: str = "gpt-oss-120", provider: str = "auto"):
	self.model = model
	self.request_timeout = 30
	self.openai_key = os.getenv("OPENAI_API_KEY")
	# Accept multiple HF token environment variable names for compatibility:
	# HUGGINGFACE_API_TOKEN, HF_API_TOKEN, or HF_TOKEN (used by some HF examples)
	self.hf_token = (
	os.getenv("HUGGINGFACE_API_TOKEN")
	or os.getenv("HF_API_TOKEN")
	or os.getenv("HF_TOKEN")
	)
	self.provider = provider.lower() if provider else "auto"

	if self.provider == "auto":
	if self.openai_key:
	self.provider = "openai"
	elif self.hf_token:
	self.provider = "hf"
	else:
	self.provider = "none"

	def generate(self, prompt: str, max_tokens: int = 512, temperature: float = 0.2) -> str:
	"""
	Generate a textual response for the given prompt.

	Returns:
	A string with the generated text.

	Raises:
	RuntimeError if no credentials are found or the remote call fails.
	"""
	if self.provider == "openai":
	return self._generate_openai(prompt, max_tokens=max_tokens, temperature=temperature)
	elif self.provider == "hf":
	return self._generate_hf(prompt, max_tokens=max_tokens, temperature=temperature)
	else:
	raise RuntimeError(
	"No API key configured for GPT wrapper. Set OPENAI_API_KEY or HUGGINGFACE_API_TOKEN in the environment."
	)

	def _generate_openai(self, prompt: str, max_tokens: int, temperature: float) -> str:
	if not self.openai_key:
	raise RuntimeError("OPENAI_API_KEY not set in environment.")

	url = "https://api.openai.com/v1/chat/completions"
	headers = {
	"Authorization": f"Bearer {self.openai_key}",
	"Content-Type": "application/json",
	}

	# Build a simple chat conversation with a single system + user message
	payload = {
	"model": self.model,
	"messages": [
	{"role": "system", "content": "You are an expert inspection assistant for wind turbine blade images/videos."},
	{"role": "user", "content": prompt},
	],
	"max_tokens": max_tokens,
	"temperature": float(temperature),
	"n": 1,
	}

	try:
	r = requests.post(url, headers=headers, json=payload, timeout=self.request_timeout)
	r.raise_for_status()
	data = r.json()
	# OpenAI API returns a list of choices
	choices = data.get("choices", [])
	if not choices:
	raise RuntimeError(f"OpenAI returned empty choices: {data}")
	# Extract the assistant message
	msg = choices[0].get("message", {}).get("content")
	if msg is None:
	# Some deployments return text in 'text' or in other fields; fallback to stringifying response
	return str(data)
	return msg.strip()
	except Exception as e:
	# Surface a clear error for the calling code to handle (the app catches exceptions)
	raise RuntimeError(f"OpenAI API call failed: {e}")

	def _generate_hf(self, prompt: str, max_tokens: int, temperature: float) -> str:
	if not self.hf_token:
	raise RuntimeError("HUGGINGFACE_API_TOKEN (or HF_API_TOKEN) not set in environment.")

	url = f"https://api-inference.huggingface.co/models/{self.model}"
	headers = {"Authorization": f"Bearer {self.hf_token}"}
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": max_tokens,
	"temperature": float(temperature),
	# Keep other params minimal; users can customize the model server side
	},
	"options": {"wait_for_model": True},
	}

	try:
	r = requests.post(url, headers=headers, json=payload, timeout=self.request_timeout)
	r.raise_for_status()
	data = r.json()
	# Hugging Face inference may return a list of generated outputs or a dict
	if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict) and "generated_text" in data[0]:
	return data[0]["generated_text"].strip()
	elif isinstance(data, dict) and "generated_text" in data:
	return data["generated_text"].strip()
	elif isinstance(data, dict) and "error" in data:
	raise RuntimeError(f"Hugging Face error: {data['error']}")
	else:
	# Some text-generation endpoints return a plain string or different struct; try to stringify
	return str(data)
	except Exception as e:
	raise RuntimeError(f"Hugging Face API call failed: {e}")


	# Backwards-compatible factory in case caller expects a function or attribute
	def GPTOSSWrapperFactory(model: Optional[str] = None, provider: Optional[str] = None):
	return GPTOSSWrapper(model=model or "gpt-oss-120", provider=provider or "auto")