Spaces:
Sleeping
Sleeping
File size: 11,183 Bytes
5425cf5 e012abb 5425cf5 43bf615 5425cf5 43bf615 5425cf5 43bf615 5425cf5 43bf615 5425cf5 0c4fe7a 5425cf5 43bf615 5425cf5 43bf615 5425cf5 43bf615 5425cf5 43bf615 5425cf5 463dc44 5425cf5 4d185a1 e012abb 5425cf5 4d185a1 43bf615 5425cf5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | # app.py
import os
import json
import requests
from typing import List, Optional
import gradio as gr
# Optional: huggingface_hub.InferenceApi if installed
try:
from huggingface_hub import InferenceApi
HF_HUB_AVAILABLE = True
except Exception:
HF_HUB_AVAILABLE = False
# Optional local generation support
try:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
TRANSFORMERS_AVAILABLE = True
except Exception:
TRANSFORMERS_AVAILABLE = False
# ---------------------
# Config / Model list
# ---------------------
DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")
# A curated list of public models for quick selection (small->medium->instruction-tuned)
COMMON_MODELS = [
"gpt2",
"distilgpt2",
"google/flan-t5-small",
"google/flan-t5-base",
"google/flan-t5-large",
"google/flan-t5-xl",
"facebook/opt-1.3b",
"facebook/opt-2.7b",
"bigscience/bloom-560m",
"bigscience/bloomz-560m",
"tiiuae/falcon-7b-instruct", # may be gated
"mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
"stabilityai/stablelm-tuned-alpha-3b",
"EleutherAI/gpt-neo-2.7B",
"google/t5-v1_1-base",
"hf-internal-testing/tiny-random-gpt2"
]
# ---------------------
# Helpers
# ---------------------
def normalize_hf_output(data) -> str:
"""Normalize HF inference output (list/dict/string) to plain text."""
if data is None:
return ""
if isinstance(data, str):
return data.strip()
if isinstance(data, list) and len(data) > 0:
first = data[0]
if isinstance(first, dict):
for key in ("generated_text", "text", "content"):
if key in first and isinstance(first[key], str):
return first[key].strip()
# fallback: join string values
vals = [str(v) for v in first.values()]
return " ".join(vals).strip()
if all(isinstance(x, str) for x in data):
return "\n".join(data).strip()
return str(data)
if isinstance(data, dict):
for key in ("generated_text", "text", "content"):
if key in data and isinstance(data[key], str):
return data[key].strip()
return json.dumps(data)
return str(data)
def get_api_token(input_token: Optional[str]) -> Optional[str]:
"""Prefer UI-provided token, then env vars, else None."""
if input_token and input_token.strip():
return input_token.strip()
return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
# ---------------------
# Inference callers
# ---------------------
def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
"""
Call HF router endpoint which is more future-proof for some hosted models.
Returns a plain-text response or a helpful error message.
"""
url = f"https://router.huggingface.co/hf-inference/{model}"
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"Bearer {token}"
payload = {
"inputs": prompt,
"parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
}
try:
resp = requests.post(url, headers=headers, json=payload, timeout=60)
except Exception as e:
return f"[Request error: {e}]"
if resp.status_code == 410:
return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
"Try another model or check the model page for access requirements.]")
if resp.status_code == 404:
return "[Error 404: model not found. Check the model id or try a different model.]"
if resp.status_code == 401:
return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
if resp.status_code != 200:
# include limited info
try:
info = resp.json()
except Exception:
info = resp.text
return f"[HF error {resp.status_code}: {info}]"
try:
data = resp.json()
except Exception:
return resp.text
return normalize_hf_output(data)
def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
"""Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
if not HF_HUB_AVAILABLE:
return call_hf_router(prompt, model, token, max_new_tokens, temperature)
try:
api = InferenceApi(repo_id=model, token=token)
out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
return normalize_hf_output(out)
except Exception as e:
# fallback to router
return call_hf_router(prompt, model, token, max_new_tokens, temperature)
# Local generation fallback
_local_gen = None
def init_local_gen(model_name: str):
global _local_gen
if not TRANSFORMERS_AVAILABLE:
return None
try:
# Try to initialize pipeline for the specific model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
_local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
return _local_gen
except Exception:
try:
_local_gen = pipeline("text-generation", model=model_name)
return _local_gen
except Exception:
return None
def call_local(prompt: str, model_name: str):
gen = init_local_gen(model_name)
if gen is None:
return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
try:
out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
if isinstance(out, list) and len(out) > 0:
first = out[0]
if isinstance(first, dict):
for key in ("generated_text", "text"):
if key in first and isinstance(first[key], str):
return first[key].strip()
return str(first)
if isinstance(first, str):
return first
return str(out)
except Exception as e:
return f"[Local generation failed: {e}]"
# ---------------------
# Conversation prompt builder
# ---------------------
SYSTEM_PROMPT = (
"You are an expert computer technician and systems engineer. "
"You know practical details about personal computers, servers, operating systems, networking, "
"hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
"When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
"explain risks and trade-offs, and include commands or code snippets if they are useful."
)
def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
parts = [f"System: {system_prompt}", "Conversation:"]
for user_msg, assistant_msg in history:
parts.append(f"User: {user_msg}")
if assistant_msg:
parts.append(f"Assistant: {assistant_msg}")
parts.append("Assistant:")
return "\n".join(parts)
# ---------------------
# Gradio callbacks
# ---------------------
def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
if chat_history is None:
chat_history = []
chat_history.append([user_message, None])
model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
token = get_api_token(api_key_input)
prompt = build_prompt(SYSTEM_PROMPT, chat_history)
# Choose inference path
if mode == "HuggingFace (remote)":
# prefer huggingface_hub wrapper if available, fallback to router
if HF_HUB_AVAILABLE:
reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
else:
reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
else:
reply = call_local(prompt, model_to_use)
# Ensure string and safe value
if reply is None:
reply = ""
reply = str(reply)
chat_history[-1][1] = reply
return chat_history, ""
def clear_history():
return []
# ---------------------
# Gradio UI
# ---------------------
with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="AI Computer Expert")
user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
with gr.Row():
send_btn = gr.Button("Send")
clear_btn = gr.Button("Clear")
with gr.Column(scale=1):
mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)
gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")
examples = [
"My Windows 10 laptop randomly restarts — how do I diagnose this?",
"How can I speed up boot time on Ubuntu?",
"Explain how RAID 1 differs from RAID 5 and when to use each.",
"I get 'kernel panic' on boot, what logs should I check?"
]
gr.Examples(examples=examples, inputs=user_input)
send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
clear_btn.click(lambda: [], None, chatbot)
gr.Markdown("---")
gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*")
if __name__ == "__main__":
# port can be set with PORT env var (useful for Spaces)
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
|