Spaces:

sakuragolden
/

computerai

Sleeping

App Files Files Community

computerai / app.py

sakuragolden

Update app.py

5425cf5 verified about 2 months ago

raw

history blame contribute delete

11.2 kB

	# app.py
	import os
	import json
	import requests
	from typing import List, Optional

	import gradio as gr

	# Optional: huggingface_hub.InferenceApi if installed
	try:
	from huggingface_hub import InferenceApi
	HF_HUB_AVAILABLE = True
	except Exception:
	HF_HUB_AVAILABLE = False

	# Optional local generation support
	try:
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	TRANSFORMERS_AVAILABLE = True
	except Exception:
	TRANSFORMERS_AVAILABLE = False

	# ---------------------
	# Config / Model list
	# ---------------------
	DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")

	# A curated list of public models for quick selection (small->medium->instruction-tuned)
	COMMON_MODELS = [
	"gpt2",
	"distilgpt2",
	"google/flan-t5-small",
	"google/flan-t5-base",
	"google/flan-t5-large",
	"google/flan-t5-xl",
	"facebook/opt-1.3b",
	"facebook/opt-2.7b",
	"bigscience/bloom-560m",
	"bigscience/bloomz-560m",
	"tiiuae/falcon-7b-instruct", # may be gated
	"mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
	"stabilityai/stablelm-tuned-alpha-3b",
	"EleutherAI/gpt-neo-2.7B",
	"google/t5-v1_1-base",
	"hf-internal-testing/tiny-random-gpt2"
	]

	# ---------------------
	# Helpers
	# ---------------------
	def normalize_hf_output(data) -> str:
	"""Normalize HF inference output (list/dict/string) to plain text."""
	if data is None:
	return ""
	if isinstance(data, str):
	return data.strip()
	if isinstance(data, list) and len(data) > 0:
	first = data[0]
	if isinstance(first, dict):
	for key in ("generated_text", "text", "content"):
	if key in first and isinstance(first[key], str):
	return first[key].strip()
	# fallback: join string values
	vals = [str(v) for v in first.values()]
	return " ".join(vals).strip()
	if all(isinstance(x, str) for x in data):
	return "\n".join(data).strip()
	return str(data)
	if isinstance(data, dict):
	for key in ("generated_text", "text", "content"):
	if key in data and isinstance(data[key], str):
	return data[key].strip()
	return json.dumps(data)
	return str(data)

	def get_api_token(input_token: Optional[str]) -> Optional[str]:
	"""Prefer UI-provided token, then env vars, else None."""
	if input_token and input_token.strip():
	return input_token.strip()
	return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")

	# ---------------------
	# Inference callers
	# ---------------------
	def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
	"""
	Call HF router endpoint which is more future-proof for some hosted models.
	Returns a plain-text response or a helpful error message.
	"""
	url = f"https://router.huggingface.co/hf-inference/{model}"
	headers = {"Content-Type": "application/json"}
	if token:
	headers["Authorization"] = f"Bearer {token}"
	payload = {
	"inputs": prompt,
	"parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
	}
	try:
	resp = requests.post(url, headers=headers, json=payload, timeout=60)
	except Exception as e:
	return f"[Request error: {e}]"

	if resp.status_code == 410:
	return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
	"Try another model or check the model page for access requirements.]")
	if resp.status_code == 404:
	return "[Error 404: model not found. Check the model id or try a different model.]"
	if resp.status_code == 401:
	return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
	if resp.status_code != 200:
	# include limited info
	try:
	info = resp.json()
	except Exception:
	info = resp.text
	return f"[HF error {resp.status_code}: {info}]"

	try:
	data = resp.json()
	except Exception:
	return resp.text
	return normalize_hf_output(data)

	def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
	"""Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
	if not HF_HUB_AVAILABLE:
	return call_hf_router(prompt, model, token, max_new_tokens, temperature)
	try:
	api = InferenceApi(repo_id=model, token=token)
	out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
	return normalize_hf_output(out)
	except Exception as e:
	# fallback to router
	return call_hf_router(prompt, model, token, max_new_tokens, temperature)

	# Local generation fallback
	_local_gen = None
	def init_local_gen(model_name: str):
	global _local_gen
	if not TRANSFORMERS_AVAILABLE:
	return None
	try:
	# Try to initialize pipeline for the specific model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	_local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
	return _local_gen
	except Exception:
	try:
	_local_gen = pipeline("text-generation", model=model_name)
	return _local_gen
	except Exception:
	return None

	def call_local(prompt: str, model_name: str):
	gen = init_local_gen(model_name)
	if gen is None:
	return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
	try:
	out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
	if isinstance(out, list) and len(out) > 0:
	first = out[0]
	if isinstance(first, dict):
	for key in ("generated_text", "text"):
	if key in first and isinstance(first[key], str):
	return first[key].strip()
	return str(first)
	if isinstance(first, str):
	return first
	return str(out)
	except Exception as e:
	return f"[Local generation failed: {e}]"

	# ---------------------
	# Conversation prompt builder
	# ---------------------
	SYSTEM_PROMPT = (
	"You are an expert computer technician and systems engineer. "
	"You know practical details about personal computers, servers, operating systems, networking, "
	"hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
	"When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
	"explain risks and trade-offs, and include commands or code snippets if they are useful."
	)

	def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
	parts = [f"System: {system_prompt}", "Conversation:"]
	for user_msg, assistant_msg in history:
	parts.append(f"User: {user_msg}")
	if assistant_msg:
	parts.append(f"Assistant: {assistant_msg}")
	parts.append("Assistant:")
	return "\n".join(parts)

	# ---------------------
	# Gradio callbacks
	# ---------------------
	def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
	if chat_history is None:
	chat_history = []
	chat_history.append([user_message, None])

	model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
	token = get_api_token(api_key_input)

	prompt = build_prompt(SYSTEM_PROMPT, chat_history)

	# Choose inference path
	if mode == "HuggingFace (remote)":
	# prefer huggingface_hub wrapper if available, fallback to router
	if HF_HUB_AVAILABLE:
	reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
	else:
	reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
	else:
	reply = call_local(prompt, model_to_use)

	# Ensure string and safe value
	if reply is None:
	reply = ""
	reply = str(reply)

	chat_history[-1][1] = reply
	return chat_history, ""

	def clear_history():
	return []

	# ---------------------
	# Gradio UI
	# ---------------------
	with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
	gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
	gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(label="AI Computer Expert")
	user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
	with gr.Row():
	send_btn = gr.Button("Send")
	clear_btn = gr.Button("Clear")
	with gr.Column(scale=1):
	mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
	model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
	custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
	api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
	max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)

	gr.Markdown("Notes:\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")

	examples = [
	"My Windows 10 laptop randomly restarts — how do I diagnose this?",
	"How can I speed up boot time on Ubuntu?",
	"Explain how RAID 1 differs from RAID 5 and when to use each.",
	"I get 'kernel panic' on boot, what logs should I check?"
	]
	gr.Examples(examples=examples, inputs=user_input)

	send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
	user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
	clear_btn.click(lambda: [], None, chatbot)

	gr.Markdown("---")
	gr.Markdown("This app supports many HF models; some models may be gated or not available via hosted inference.")

	if __name__ == "__main__":
	# port can be set with PORT env var (useful for Spaces)
	demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))