computerai / app.py
sakuragolden's picture
Update app.py
5425cf5 verified
# app.py
import os
import json
import requests
from typing import List, Optional
import gradio as gr
# Optional: huggingface_hub.InferenceApi if installed
try:
from huggingface_hub import InferenceApi
HF_HUB_AVAILABLE = True
except Exception:
HF_HUB_AVAILABLE = False
# Optional local generation support
try:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
TRANSFORMERS_AVAILABLE = True
except Exception:
TRANSFORMERS_AVAILABLE = False
# ---------------------
# Config / Model list
# ---------------------
DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")
# A curated list of public models for quick selection (small->medium->instruction-tuned)
COMMON_MODELS = [
"gpt2",
"distilgpt2",
"google/flan-t5-small",
"google/flan-t5-base",
"google/flan-t5-large",
"google/flan-t5-xl",
"facebook/opt-1.3b",
"facebook/opt-2.7b",
"bigscience/bloom-560m",
"bigscience/bloomz-560m",
"tiiuae/falcon-7b-instruct", # may be gated
"mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
"stabilityai/stablelm-tuned-alpha-3b",
"EleutherAI/gpt-neo-2.7B",
"google/t5-v1_1-base",
"hf-internal-testing/tiny-random-gpt2"
]
# ---------------------
# Helpers
# ---------------------
def normalize_hf_output(data) -> str:
"""Normalize HF inference output (list/dict/string) to plain text."""
if data is None:
return ""
if isinstance(data, str):
return data.strip()
if isinstance(data, list) and len(data) > 0:
first = data[0]
if isinstance(first, dict):
for key in ("generated_text", "text", "content"):
if key in first and isinstance(first[key], str):
return first[key].strip()
# fallback: join string values
vals = [str(v) for v in first.values()]
return " ".join(vals).strip()
if all(isinstance(x, str) for x in data):
return "\n".join(data).strip()
return str(data)
if isinstance(data, dict):
for key in ("generated_text", "text", "content"):
if key in data and isinstance(data[key], str):
return data[key].strip()
return json.dumps(data)
return str(data)
def get_api_token(input_token: Optional[str]) -> Optional[str]:
"""Prefer UI-provided token, then env vars, else None."""
if input_token and input_token.strip():
return input_token.strip()
return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
# ---------------------
# Inference callers
# ---------------------
def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
"""
Call HF router endpoint which is more future-proof for some hosted models.
Returns a plain-text response or a helpful error message.
"""
url = f"https://router.huggingface.co/hf-inference/{model}"
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"Bearer {token}"
payload = {
"inputs": prompt,
"parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
}
try:
resp = requests.post(url, headers=headers, json=payload, timeout=60)
except Exception as e:
return f"[Request error: {e}]"
if resp.status_code == 410:
return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
"Try another model or check the model page for access requirements.]")
if resp.status_code == 404:
return "[Error 404: model not found. Check the model id or try a different model.]"
if resp.status_code == 401:
return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
if resp.status_code != 200:
# include limited info
try:
info = resp.json()
except Exception:
info = resp.text
return f"[HF error {resp.status_code}: {info}]"
try:
data = resp.json()
except Exception:
return resp.text
return normalize_hf_output(data)
def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
"""Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
if not HF_HUB_AVAILABLE:
return call_hf_router(prompt, model, token, max_new_tokens, temperature)
try:
api = InferenceApi(repo_id=model, token=token)
out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
return normalize_hf_output(out)
except Exception as e:
# fallback to router
return call_hf_router(prompt, model, token, max_new_tokens, temperature)
# Local generation fallback
_local_gen = None
def init_local_gen(model_name: str):
global _local_gen
if not TRANSFORMERS_AVAILABLE:
return None
try:
# Try to initialize pipeline for the specific model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
_local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
return _local_gen
except Exception:
try:
_local_gen = pipeline("text-generation", model=model_name)
return _local_gen
except Exception:
return None
def call_local(prompt: str, model_name: str):
gen = init_local_gen(model_name)
if gen is None:
return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
try:
out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
if isinstance(out, list) and len(out) > 0:
first = out[0]
if isinstance(first, dict):
for key in ("generated_text", "text"):
if key in first and isinstance(first[key], str):
return first[key].strip()
return str(first)
if isinstance(first, str):
return first
return str(out)
except Exception as e:
return f"[Local generation failed: {e}]"
# ---------------------
# Conversation prompt builder
# ---------------------
SYSTEM_PROMPT = (
"You are an expert computer technician and systems engineer. "
"You know practical details about personal computers, servers, operating systems, networking, "
"hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
"When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
"explain risks and trade-offs, and include commands or code snippets if they are useful."
)
def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
parts = [f"System: {system_prompt}", "Conversation:"]
for user_msg, assistant_msg in history:
parts.append(f"User: {user_msg}")
if assistant_msg:
parts.append(f"Assistant: {assistant_msg}")
parts.append("Assistant:")
return "\n".join(parts)
# ---------------------
# Gradio callbacks
# ---------------------
def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
if chat_history is None:
chat_history = []
chat_history.append([user_message, None])
model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
token = get_api_token(api_key_input)
prompt = build_prompt(SYSTEM_PROMPT, chat_history)
# Choose inference path
if mode == "HuggingFace (remote)":
# prefer huggingface_hub wrapper if available, fallback to router
if HF_HUB_AVAILABLE:
reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
else:
reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
else:
reply = call_local(prompt, model_to_use)
# Ensure string and safe value
if reply is None:
reply = ""
reply = str(reply)
chat_history[-1][1] = reply
return chat_history, ""
def clear_history():
return []
# ---------------------
# Gradio UI
# ---------------------
with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="AI Computer Expert")
user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
with gr.Row():
send_btn = gr.Button("Send")
clear_btn = gr.Button("Clear")
with gr.Column(scale=1):
mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)
gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")
examples = [
"My Windows 10 laptop randomly restarts — how do I diagnose this?",
"How can I speed up boot time on Ubuntu?",
"Explain how RAID 1 differs from RAID 5 and when to use each.",
"I get 'kernel panic' on boot, what logs should I check?"
]
gr.Examples(examples=examples, inputs=user_input)
send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
clear_btn.click(lambda: [], None, chatbot)
gr.Markdown("---")
gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*")
if __name__ == "__main__":
# port can be set with PORT env var (useful for Spaces)
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))