Kesheratmex commited on
Commit
f9d898d
·
1 Parent(s): 3ec28bd

Add json import to app.py for JSON handling

Browse files
Files changed (2) hide show
  1. app.py +1 -0
  2. blade-inspection-demo/gptoss_wrapper.py +62 -0
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import tempfile
 
3
  import shutil
4
  import os
5
  import cv2
 
1
  import gradio as gr
2
  import tempfile
3
+ import json
4
  import shutil
5
  import os
6
  import cv2
blade-inspection-demo/gptoss_wrapper.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import json
4
+ import requests
5
+ from typing import Optional
6
+
7
+ class GPTOSSWrapper:
8
+ """
9
+ Lightweight wrapper to call the GPT-OSS 120 model via HuggingFace Inference API.
10
+ Supports CPU-only workflows by using the public inference endpoint and an optional HF token.
11
+ """
12
+ def __init__(self, model: str = "gpt-oss-120", token: Optional[str] = None, base_url: Optional[str] = None, timeout: int = 60):
13
+ self.model = model
14
+ self.base_url = base_url or f"https://api-inference.huggingface.co/models/{model}"
15
+ self.token = token or os.environ.get("HF_API_TOKEN")
16
+ self.session = requests.Session()
17
+ self.timeout = timeout
18
+
19
+ def _headers(self) -> dict:
20
+ headers = {"Accept": "application/json"}
21
+ if self.token:
22
+ headers["Authorization"] = f"Bearer {self.token}"
23
+ return headers
24
+
25
+ def generate(self, prompt: str, max_new_tokens: int = 1024, temperature: float = 0.3, top_p: float = 0.9) -> str:
26
+ """
27
+ Send the prompt to the GPT-OSS 120 model and return generated text.
28
+ Tries a couple of payload shapes to maximize compatibility with HF Inference API responses.
29
+ """
30
+ payloads = [
31
+ {"inputs": prompt, "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature, "top_p": top_p}},
32
+ {"inputs": {"prompt": prompt}, "options": {"wait_for_model": True}, "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}}
33
+ ]
34
+ last_err = None
35
+ for payload in payloads:
36
+ try:
37
+ resp = self.session.post(self.base_url, headers=self._headers(), json=payload, timeout=self.timeout)
38
+ resp.raise_for_status()
39
+ data = resp.json()
40
+ # Normalize possible response shapes
41
+ if isinstance(data, dict):
42
+ if "generated_text" in data:
43
+ return data["generated_text"]
44
+ if "choices" in data and isinstance(data["choices"], list) and data["choices"]:
45
+ first = data["choices"][0]
46
+ if isinstance(first, dict):
47
+ if "text" in first:
48
+ return first["text"]
49
+ if "generated_text" in first:
50
+ return first["generated_text"]
51
+ if isinstance(data, str):
52
+ return data
53
+ return json.dumps(data, indent=2)
54
+ except Exception as e:
55
+ last_err = e
56
+ continue
57
+ return f"Error calling GPT-OSS: {last_err}"
58
+
59
+ # Simple convenience function
60
+ def generate_report_text(prompt: str, token: Optional[str] = None, model: str = "gpt-oss-120") -> str:
61
+ wrapper = GPTOSSWrapper(model=model, token=token)
62
+ return wrapper.generate(prompt)