Spaces:

sakuragolden
/

computerai

Sleeping

App Files Files Community

sakuragolden commited on Nov 15, 2025

Commit

5425cf5

verified ·

1 Parent(s): e012abb

Update app.py

Browse files

Files changed (1) hide show

app.py +253 -30

app.py CHANGED Viewed

@@ -1,45 +1,268 @@
-import gradio as gr
-import requests
 import os
-# Default HF model
-HUGGINGFACE_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-API_URL = f"https://api-inference.huggingface.co/models/{HUGGINGFACE_MODEL}"
-# Query HF API
-def query_hf_api(prompt, api_key):
-    if not api_key:
-        return "Error: Please enter your Hugging Face API key."
-    headers = {"Authorization": f"Bearer {api_key}"}
-    payload = {"inputs": prompt}
-    try:
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
-        response.raise_for_status()
-        data = response.json()
-        if isinstance(data, list) and len(data) > 0:
-            return data[0].get("generated_text", "No response.")
         return str(data)
     except Exception as e:
-        return f"API error: {e}"
-# Gradio interface
-def chat(prompt, api_key):
-    return query_hf_api(prompt, api_key)
-with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 AI Computer Expert\nAsk anything about computers!")
-    api_key = gr.Textbox(label="HuggingFace API Key", placeholder="Enter your HF API key")
-    prompt = gr.Textbox(label="Your Question", placeholder="Ask the AI anything about computers...")
-    output = gr.Textbox(label="AI Answer")
-    btn = gr.Button("Ask")
-    btn.click(fn=chat, inputs=[prompt, api_key], outputs=output)
     gr.Markdown("---")
-    gr.Markdown("*This app uses the Hugging Face Inference API. Enter any hosted model key to run.*")
 if __name__ == "__main__":
-    demo.launch()

+# app.py
 import os
+import json
+import requests
+from typing import List, Optional
+import gradio as gr
+# Optional: huggingface_hub.InferenceApi if installed
+try:
+    from huggingface_hub import InferenceApi
+    HF_HUB_AVAILABLE = True
+except Exception:
+    HF_HUB_AVAILABLE = False
+# Optional local generation support
+try:
+    from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+    TRANSFORMERS_AVAILABLE = True
+except Exception:
+    TRANSFORMERS_AVAILABLE = False
+# ---------------------
+# Config / Model list
+# ---------------------
+DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")
+# A curated list of public models for quick selection (small->medium->instruction-tuned)
+COMMON_MODELS = [
+    "gpt2",
+    "distilgpt2",
+    "google/flan-t5-small",
+    "google/flan-t5-base",
+    "google/flan-t5-large",
+    "google/flan-t5-xl",
+    "facebook/opt-1.3b",
+    "facebook/opt-2.7b",
+    "bigscience/bloom-560m",
+    "bigscience/bloomz-560m",
+    "tiiuae/falcon-7b-instruct",          # may be gated
+    "mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
+    "stabilityai/stablelm-tuned-alpha-3b",
+    "EleutherAI/gpt-neo-2.7B",
+    "google/t5-v1_1-base",
+    "hf-internal-testing/tiny-random-gpt2"
+]
+# ---------------------
+# Helpers
+# ---------------------
+def normalize_hf_output(data) -> str:
+    """Normalize HF inference output (list/dict/string) to plain text."""
+    if data is None:
+        return ""
+    if isinstance(data, str):
+        return data.strip()
+    if isinstance(data, list) and len(data) > 0:
+        first = data[0]
+        if isinstance(first, dict):
+            for key in ("generated_text", "text", "content"):
+                if key in first and isinstance(first[key], str):
+                    return first[key].strip()
+            # fallback: join string values
+            vals = [str(v) for v in first.values()]
+            return " ".join(vals).strip()
+        if all(isinstance(x, str) for x in data):
+            return "\n".join(data).strip()
         return str(data)
+    if isinstance(data, dict):
+        for key in ("generated_text", "text", "content"):
+            if key in data and isinstance(data[key], str):
+                return data[key].strip()
+        return json.dumps(data)
+    return str(data)
+def get_api_token(input_token: Optional[str]) -> Optional[str]:
+    """Prefer UI-provided token, then env vars, else None."""
+    if input_token and input_token.strip():
+        return input_token.strip()
+    return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
+# ---------------------
+# Inference callers
+# ---------------------
+def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
+    """
+    Call HF router endpoint which is more future-proof for some hosted models.
+    Returns a plain-text response or a helpful error message.
+    """
+    url = f"https://router.huggingface.co/hf-inference/{model}"
+    headers = {"Content-Type": "application/json"}
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    payload = {
+        "inputs": prompt,
+        "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
+    }
+    try:
+        resp = requests.post(url, headers=headers, json=payload, timeout=60)
+    except Exception as e:
+        return f"[Request error: {e}]"
+    if resp.status_code == 410:
+        return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
+                "Try another model or check the model page for access requirements.]")
+    if resp.status_code == 404:
+        return "[Error 404: model not found. Check the model id or try a different model.]"
+    if resp.status_code == 401:
+        return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
+    if resp.status_code != 200:
+        # include limited info
+        try:
+            info = resp.json()
+        except Exception:
+            info = resp.text
+        return f"[HF error {resp.status_code}: {info}]"
+    try:
+        data = resp.json()
+    except Exception:
+        return resp.text
+    return normalize_hf_output(data)
+def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
+    """Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
+    if not HF_HUB_AVAILABLE:
+        return call_hf_router(prompt, model, token, max_new_tokens, temperature)
+    try:
+        api = InferenceApi(repo_id=model, token=token)
+        out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
+        return normalize_hf_output(out)
     except Exception as e:
+        # fallback to router
+        return call_hf_router(prompt, model, token, max_new_tokens, temperature)
+# Local generation fallback
+_local_gen = None
+def init_local_gen(model_name: str):
+    global _local_gen
+    if not TRANSFORMERS_AVAILABLE:
+        return None
+    try:
+        # Try to initialize pipeline for the specific model
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        _local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
+        return _local_gen
+    except Exception:
+        try:
+            _local_gen = pipeline("text-generation", model=model_name)
+            return _local_gen
+        except Exception:
+            return None
+def call_local(prompt: str, model_name: str):
+    gen = init_local_gen(model_name)
+    if gen is None:
+        return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
+    try:
+        out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
+        if isinstance(out, list) and len(out) > 0:
+            first = out[0]
+            if isinstance(first, dict):
+                for key in ("generated_text", "text"):
+                    if key in first and isinstance(first[key], str):
+                        return first[key].strip()
+                return str(first)
+            if isinstance(first, str):
+                return first
+        return str(out)
+    except Exception as e:
+        return f"[Local generation failed: {e}]"
+# ---------------------
+# Conversation prompt builder
+# ---------------------
+SYSTEM_PROMPT = (
+    "You are an expert computer technician and systems engineer. "
+    "You know practical details about personal computers, servers, operating systems, networking, "
+    "hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
+    "When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
+    "explain risks and trade-offs, and include commands or code snippets if they are useful."
+)
+def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
+    parts = [f"System: {system_prompt}", "Conversation:"]
+    for user_msg, assistant_msg in history:
+        parts.append(f"User: {user_msg}")
+        if assistant_msg:
+            parts.append(f"Assistant: {assistant_msg}")
+    parts.append("Assistant:")
+    return "\n".join(parts)
+# ---------------------
+# Gradio callbacks
+# ---------------------
+def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
+    if chat_history is None:
+        chat_history = []
+    chat_history.append([user_message, None])
+    model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
+    token = get_api_token(api_key_input)
+    prompt = build_prompt(SYSTEM_PROMPT, chat_history)
+    # Choose inference path
+    if mode == "HuggingFace (remote)":
+        # prefer huggingface_hub wrapper if available, fallback to router
+        if HF_HUB_AVAILABLE:
+            reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
+        else:
+            reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
+    else:
+        reply = call_local(prompt, model_to_use)
+    # Ensure string and safe value
+    if reply is None:
+        reply = ""
+    reply = str(reply)
+    chat_history[-1][1] = reply
+    return chat_history, ""
+def clear_history():
+    return []
+# ---------------------
+# Gradio UI
+# ---------------------
+with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
+    gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
+    gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="AI Computer Expert")
+            user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
+            with gr.Row():
+                send_btn = gr.Button("Send")
+                clear_btn = gr.Button("Clear")
+        with gr.Column(scale=1):
+            mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
+            model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
+            custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
+            api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
+            max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)
+            gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")
+    examples = [
+        "My Windows 10 laptop randomly restarts — how do I diagnose this?",
+        "How can I speed up boot time on Ubuntu?",
+        "Explain how RAID 1 differs from RAID 5 and when to use each.",
+        "I get 'kernel panic' on boot, what logs should I check?"
+    ]
+    gr.Examples(examples=examples, inputs=user_input)
+    send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
+    user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
+    clear_btn.click(lambda: [], None, chatbot)
     gr.Markdown("---")
+    gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*")
 if __name__ == "__main__":
+    # port can be set with PORT env var (useful for Spaces)
+    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))