# app.py import os import json import requests from typing import List, Optional import gradio as gr # Optional: huggingface_hub.InferenceApi if installed try: from huggingface_hub import InferenceApi HF_HUB_AVAILABLE = True except Exception: HF_HUB_AVAILABLE = False # Optional local generation support try: from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM TRANSFORMERS_AVAILABLE = True except Exception: TRANSFORMERS_AVAILABLE = False # --------------------- # Config / Model list # --------------------- DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2") # A curated list of public models for quick selection (small->medium->instruction-tuned) COMMON_MODELS = [ "gpt2", "distilgpt2", "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl", "facebook/opt-1.3b", "facebook/opt-2.7b", "bigscience/bloom-560m", "bigscience/bloomz-560m", "tiiuae/falcon-7b-instruct", # may be gated "mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large "stabilityai/stablelm-tuned-alpha-3b", "EleutherAI/gpt-neo-2.7B", "google/t5-v1_1-base", "hf-internal-testing/tiny-random-gpt2" ] # --------------------- # Helpers # --------------------- def normalize_hf_output(data) -> str: """Normalize HF inference output (list/dict/string) to plain text.""" if data is None: return "" if isinstance(data, str): return data.strip() if isinstance(data, list) and len(data) > 0: first = data[0] if isinstance(first, dict): for key in ("generated_text", "text", "content"): if key in first and isinstance(first[key], str): return first[key].strip() # fallback: join string values vals = [str(v) for v in first.values()] return " ".join(vals).strip() if all(isinstance(x, str) for x in data): return "\n".join(data).strip() return str(data) if isinstance(data, dict): for key in ("generated_text", "text", "content"): if key in data and isinstance(data[key], str): return data[key].strip() return json.dumps(data) return str(data) def get_api_token(input_token: Optional[str]) -> Optional[str]: """Prefer UI-provided token, then env vars, else None.""" if input_token and input_token.strip(): return input_token.strip() return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") # --------------------- # Inference callers # --------------------- def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str: """ Call HF router endpoint which is more future-proof for some hosted models. Returns a plain-text response or a helpful error message. """ url = f"https://router.huggingface.co/hf-inference/{model}" headers = {"Content-Type": "application/json"} if token: headers["Authorization"] = f"Bearer {token}" payload = { "inputs": prompt, "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature} } try: resp = requests.post(url, headers=headers, json=payload, timeout=60) except Exception as e: return f"[Request error: {e}]" if resp.status_code == 410: return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. " "Try another model or check the model page for access requirements.]") if resp.status_code == 404: return "[Error 404: model not found. Check the model id or try a different model.]" if resp.status_code == 401: return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]" if resp.status_code != 200: # include limited info try: info = resp.json() except Exception: info = resp.text return f"[HF error {resp.status_code}: {info}]" try: data = resp.json() except Exception: return resp.text return normalize_hf_output(data) def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str: """Use huggingface_hub.InferenceApi when available (wraps different behaviour).""" if not HF_HUB_AVAILABLE: return call_hf_router(prompt, model, token, max_new_tokens, temperature) try: api = InferenceApi(repo_id=model, token=token) out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature}) return normalize_hf_output(out) except Exception as e: # fallback to router return call_hf_router(prompt, model, token, max_new_tokens, temperature) # Local generation fallback _local_gen = None def init_local_gen(model_name: str): global _local_gen if not TRANSFORMERS_AVAILABLE: return None try: # Try to initialize pipeline for the specific model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) _local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer) return _local_gen except Exception: try: _local_gen = pipeline("text-generation", model=model_name) return _local_gen except Exception: return None def call_local(prompt: str, model_name: str): gen = init_local_gen(model_name) if gen is None: return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]" try: out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1) if isinstance(out, list) and len(out) > 0: first = out[0] if isinstance(first, dict): for key in ("generated_text", "text"): if key in first and isinstance(first[key], str): return first[key].strip() return str(first) if isinstance(first, str): return first return str(out) except Exception as e: return f"[Local generation failed: {e}]" # --------------------- # Conversation prompt builder # --------------------- SYSTEM_PROMPT = ( "You are an expert computer technician and systems engineer. " "You know practical details about personal computers, servers, operating systems, networking, " "hardware troubleshooting, performance tuning, security best practices, software installation and debugging. " "When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, " "explain risks and trade-offs, and include commands or code snippets if they are useful." ) def build_prompt(system_prompt: str, history: List[List[str]]) -> str: parts = [f"System: {system_prompt}", "Conversation:"] for user_msg, assistant_msg in history: parts.append(f"User: {user_msg}") if assistant_msg: parts.append(f"Assistant: {assistant_msg}") parts.append("Assistant:") return "\n".join(parts) # --------------------- # Gradio callbacks # --------------------- def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int): if chat_history is None: chat_history = [] chat_history.append([user_message, None]) model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model token = get_api_token(api_key_input) prompt = build_prompt(SYSTEM_PROMPT, chat_history) # Choose inference path if mode == "HuggingFace (remote)": # prefer huggingface_hub wrapper if available, fallback to router if HF_HUB_AVAILABLE: reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens) else: reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens) else: reply = call_local(prompt, model_to_use) # Ensure string and safe value if reply is None: reply = "" reply = str(reply) chat_history[-1][1] = reply return chat_history, "" def clear_history(): return [] # --------------------- # Gradio UI # --------------------- with gr.Blocks(title="AI Computer Expert (multi-model)") as demo: gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)") gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot(label="AI Computer Expert") user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2) with gr.Row(): send_btn = gr.Button("Send") clear_btn = gr.Button("Clear") with gr.Column(scale=1): mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode") model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL) custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)") api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...") max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256) gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.") examples = [ "My Windows 10 laptop randomly restarts — how do I diagnose this?", "How can I speed up boot time on Ubuntu?", "Explain how RAID 1 differs from RAID 5 and when to use each.", "I get 'kernel panic' on boot, what logs should I check?" ] gr.Examples(examples=examples, inputs=user_input) send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input]) user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input]) clear_btn.click(lambda: [], None, chatbot) gr.Markdown("---") gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*") if __name__ == "__main__": # port can be set with PORT env var (useful for Spaces) demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))