Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import json | |
| import requests | |
| from typing import List, Optional | |
| import gradio as gr | |
| # Optional: huggingface_hub.InferenceApi if installed | |
| try: | |
| from huggingface_hub import InferenceApi | |
| HF_HUB_AVAILABLE = True | |
| except Exception: | |
| HF_HUB_AVAILABLE = False | |
| # Optional local generation support | |
| try: | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| TRANSFORMERS_AVAILABLE = True | |
| except Exception: | |
| TRANSFORMERS_AVAILABLE = False | |
| # --------------------- | |
| # Config / Model list | |
| # --------------------- | |
| DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2") | |
| # A curated list of public models for quick selection (small->medium->instruction-tuned) | |
| COMMON_MODELS = [ | |
| "gpt2", | |
| "distilgpt2", | |
| "google/flan-t5-small", | |
| "google/flan-t5-base", | |
| "google/flan-t5-large", | |
| "google/flan-t5-xl", | |
| "facebook/opt-1.3b", | |
| "facebook/opt-2.7b", | |
| "bigscience/bloom-560m", | |
| "bigscience/bloomz-560m", | |
| "tiiuae/falcon-7b-instruct", # may be gated | |
| "mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large | |
| "stabilityai/stablelm-tuned-alpha-3b", | |
| "EleutherAI/gpt-neo-2.7B", | |
| "google/t5-v1_1-base", | |
| "hf-internal-testing/tiny-random-gpt2" | |
| ] | |
| # --------------------- | |
| # Helpers | |
| # --------------------- | |
| def normalize_hf_output(data) -> str: | |
| """Normalize HF inference output (list/dict/string) to plain text.""" | |
| if data is None: | |
| return "" | |
| if isinstance(data, str): | |
| return data.strip() | |
| if isinstance(data, list) and len(data) > 0: | |
| first = data[0] | |
| if isinstance(first, dict): | |
| for key in ("generated_text", "text", "content"): | |
| if key in first and isinstance(first[key], str): | |
| return first[key].strip() | |
| # fallback: join string values | |
| vals = [str(v) for v in first.values()] | |
| return " ".join(vals).strip() | |
| if all(isinstance(x, str) for x in data): | |
| return "\n".join(data).strip() | |
| return str(data) | |
| if isinstance(data, dict): | |
| for key in ("generated_text", "text", "content"): | |
| if key in data and isinstance(data[key], str): | |
| return data[key].strip() | |
| return json.dumps(data) | |
| return str(data) | |
| def get_api_token(input_token: Optional[str]) -> Optional[str]: | |
| """Prefer UI-provided token, then env vars, else None.""" | |
| if input_token and input_token.strip(): | |
| return input_token.strip() | |
| return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") | |
| # --------------------- | |
| # Inference callers | |
| # --------------------- | |
| def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str: | |
| """ | |
| Call HF router endpoint which is more future-proof for some hosted models. | |
| Returns a plain-text response or a helpful error message. | |
| """ | |
| url = f"https://router.huggingface.co/hf-inference/{model}" | |
| headers = {"Content-Type": "application/json"} | |
| if token: | |
| headers["Authorization"] = f"Bearer {token}" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature} | |
| } | |
| try: | |
| resp = requests.post(url, headers=headers, json=payload, timeout=60) | |
| except Exception as e: | |
| return f"[Request error: {e}]" | |
| if resp.status_code == 410: | |
| return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. " | |
| "Try another model or check the model page for access requirements.]") | |
| if resp.status_code == 404: | |
| return "[Error 404: model not found. Check the model id or try a different model.]" | |
| if resp.status_code == 401: | |
| return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]" | |
| if resp.status_code != 200: | |
| # include limited info | |
| try: | |
| info = resp.json() | |
| except Exception: | |
| info = resp.text | |
| return f"[HF error {resp.status_code}: {info}]" | |
| try: | |
| data = resp.json() | |
| except Exception: | |
| return resp.text | |
| return normalize_hf_output(data) | |
| def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str: | |
| """Use huggingface_hub.InferenceApi when available (wraps different behaviour).""" | |
| if not HF_HUB_AVAILABLE: | |
| return call_hf_router(prompt, model, token, max_new_tokens, temperature) | |
| try: | |
| api = InferenceApi(repo_id=model, token=token) | |
| out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature}) | |
| return normalize_hf_output(out) | |
| except Exception as e: | |
| # fallback to router | |
| return call_hf_router(prompt, model, token, max_new_tokens, temperature) | |
| # Local generation fallback | |
| _local_gen = None | |
| def init_local_gen(model_name: str): | |
| global _local_gen | |
| if not TRANSFORMERS_AVAILABLE: | |
| return None | |
| try: | |
| # Try to initialize pipeline for the specific model | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| _local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| return _local_gen | |
| except Exception: | |
| try: | |
| _local_gen = pipeline("text-generation", model=model_name) | |
| return _local_gen | |
| except Exception: | |
| return None | |
| def call_local(prompt: str, model_name: str): | |
| gen = init_local_gen(model_name) | |
| if gen is None: | |
| return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]" | |
| try: | |
| out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1) | |
| if isinstance(out, list) and len(out) > 0: | |
| first = out[0] | |
| if isinstance(first, dict): | |
| for key in ("generated_text", "text"): | |
| if key in first and isinstance(first[key], str): | |
| return first[key].strip() | |
| return str(first) | |
| if isinstance(first, str): | |
| return first | |
| return str(out) | |
| except Exception as e: | |
| return f"[Local generation failed: {e}]" | |
| # --------------------- | |
| # Conversation prompt builder | |
| # --------------------- | |
| SYSTEM_PROMPT = ( | |
| "You are an expert computer technician and systems engineer. " | |
| "You know practical details about personal computers, servers, operating systems, networking, " | |
| "hardware troubleshooting, performance tuning, security best practices, software installation and debugging. " | |
| "When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, " | |
| "explain risks and trade-offs, and include commands or code snippets if they are useful." | |
| ) | |
| def build_prompt(system_prompt: str, history: List[List[str]]) -> str: | |
| parts = [f"System: {system_prompt}", "Conversation:"] | |
| for user_msg, assistant_msg in history: | |
| parts.append(f"User: {user_msg}") | |
| if assistant_msg: | |
| parts.append(f"Assistant: {assistant_msg}") | |
| parts.append("Assistant:") | |
| return "\n".join(parts) | |
| # --------------------- | |
| # Gradio callbacks | |
| # --------------------- | |
| def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int): | |
| if chat_history is None: | |
| chat_history = [] | |
| chat_history.append([user_message, None]) | |
| model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model | |
| token = get_api_token(api_key_input) | |
| prompt = build_prompt(SYSTEM_PROMPT, chat_history) | |
| # Choose inference path | |
| if mode == "HuggingFace (remote)": | |
| # prefer huggingface_hub wrapper if available, fallback to router | |
| if HF_HUB_AVAILABLE: | |
| reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens) | |
| else: | |
| reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens) | |
| else: | |
| reply = call_local(prompt, model_to_use) | |
| # Ensure string and safe value | |
| if reply is None: | |
| reply = "" | |
| reply = str(reply) | |
| chat_history[-1][1] = reply | |
| return chat_history, "" | |
| def clear_history(): | |
| return [] | |
| # --------------------- | |
| # Gradio UI | |
| # --------------------- | |
| with gr.Blocks(title="AI Computer Expert (multi-model)") as demo: | |
| gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)") | |
| gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot(label="AI Computer Expert") | |
| user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2) | |
| with gr.Row(): | |
| send_btn = gr.Button("Send") | |
| clear_btn = gr.Button("Clear") | |
| with gr.Column(scale=1): | |
| mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode") | |
| model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL) | |
| custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)") | |
| api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...") | |
| max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256) | |
| gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.") | |
| examples = [ | |
| "My Windows 10 laptop randomly restarts — how do I diagnose this?", | |
| "How can I speed up boot time on Ubuntu?", | |
| "Explain how RAID 1 differs from RAID 5 and when to use each.", | |
| "I get 'kernel panic' on boot, what logs should I check?" | |
| ] | |
| gr.Examples(examples=examples, inputs=user_input) | |
| send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input]) | |
| user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input]) | |
| clear_btn.click(lambda: [], None, chatbot) | |
| gr.Markdown("---") | |
| gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*") | |
| if __name__ == "__main__": | |
| # port can be set with PORT env var (useful for Spaces) | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860))) | |