File size: 11,183 Bytes
5425cf5
e012abb
5425cf5
 
 
 
 
43bf615
5425cf5
 
 
 
 
 
43bf615
5425cf5
 
 
 
 
 
43bf615
5425cf5
 
 
 
43bf615
5425cf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c4fe7a
5425cf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43bf615
5425cf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43bf615
5425cf5
 
 
 
 
 
 
 
 
 
 
 
 
43bf615
5425cf5
43bf615
5425cf5
 
 
 
 
 
 
463dc44
5425cf5
 
 
4d185a1
e012abb
5425cf5
4d185a1
43bf615
5425cf5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# app.py
import os
import json
import requests
from typing import List, Optional

import gradio as gr

# Optional: huggingface_hub.InferenceApi if installed
try:
    from huggingface_hub import InferenceApi
    HF_HUB_AVAILABLE = True
except Exception:
    HF_HUB_AVAILABLE = False

# Optional local generation support
try:
    from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
    TRANSFORMERS_AVAILABLE = True
except Exception:
    TRANSFORMERS_AVAILABLE = False

# ---------------------
# Config / Model list
# ---------------------
DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")

# A curated list of public models for quick selection (small->medium->instruction-tuned)
COMMON_MODELS = [
    "gpt2",
    "distilgpt2",
    "google/flan-t5-small",
    "google/flan-t5-base",
    "google/flan-t5-large",
    "google/flan-t5-xl",
    "facebook/opt-1.3b",
    "facebook/opt-2.7b",
    "bigscience/bloom-560m",
    "bigscience/bloomz-560m",
    "tiiuae/falcon-7b-instruct",          # may be gated
    "mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
    "stabilityai/stablelm-tuned-alpha-3b",
    "EleutherAI/gpt-neo-2.7B",
    "google/t5-v1_1-base",
    "hf-internal-testing/tiny-random-gpt2"
]

# ---------------------
# Helpers
# ---------------------
def normalize_hf_output(data) -> str:
    """Normalize HF inference output (list/dict/string) to plain text."""
    if data is None:
        return ""
    if isinstance(data, str):
        return data.strip()
    if isinstance(data, list) and len(data) > 0:
        first = data[0]
        if isinstance(first, dict):
            for key in ("generated_text", "text", "content"):
                if key in first and isinstance(first[key], str):
                    return first[key].strip()
            # fallback: join string values
            vals = [str(v) for v in first.values()]
            return " ".join(vals).strip()
        if all(isinstance(x, str) for x in data):
            return "\n".join(data).strip()
        return str(data)
    if isinstance(data, dict):
        for key in ("generated_text", "text", "content"):
            if key in data and isinstance(data[key], str):
                return data[key].strip()
        return json.dumps(data)
    return str(data)

def get_api_token(input_token: Optional[str]) -> Optional[str]:
    """Prefer UI-provided token, then env vars, else None."""
    if input_token and input_token.strip():
        return input_token.strip()
    return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")

# ---------------------
# Inference callers
# ---------------------
def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
    """
    Call HF router endpoint which is more future-proof for some hosted models.
    Returns a plain-text response or a helpful error message.
    """
    url = f"https://router.huggingface.co/hf-inference/{model}"
    headers = {"Content-Type": "application/json"}
    if token:
        headers["Authorization"] = f"Bearer {token}"
    payload = {
        "inputs": prompt,
        "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
    }
    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=60)
    except Exception as e:
        return f"[Request error: {e}]"

    if resp.status_code == 410:
        return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
                "Try another model or check the model page for access requirements.]")
    if resp.status_code == 404:
        return "[Error 404: model not found. Check the model id or try a different model.]"
    if resp.status_code == 401:
        return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
    if resp.status_code != 200:
        # include limited info
        try:
            info = resp.json()
        except Exception:
            info = resp.text
        return f"[HF error {resp.status_code}: {info}]"

    try:
        data = resp.json()
    except Exception:
        return resp.text
    return normalize_hf_output(data)

def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
    """Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
    if not HF_HUB_AVAILABLE:
        return call_hf_router(prompt, model, token, max_new_tokens, temperature)
    try:
        api = InferenceApi(repo_id=model, token=token)
        out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
        return normalize_hf_output(out)
    except Exception as e:
        # fallback to router
        return call_hf_router(prompt, model, token, max_new_tokens, temperature)

# Local generation fallback
_local_gen = None
def init_local_gen(model_name: str):
    global _local_gen
    if not TRANSFORMERS_AVAILABLE:
        return None
    try:
        # Try to initialize pipeline for the specific model
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
        _local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
        return _local_gen
    except Exception:
        try:
            _local_gen = pipeline("text-generation", model=model_name)
            return _local_gen
        except Exception:
            return None

def call_local(prompt: str, model_name: str):
    gen = init_local_gen(model_name)
    if gen is None:
        return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
    try:
        out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
        if isinstance(out, list) and len(out) > 0:
            first = out[0]
            if isinstance(first, dict):
                for key in ("generated_text", "text"):
                    if key in first and isinstance(first[key], str):
                        return first[key].strip()
                return str(first)
            if isinstance(first, str):
                return first
        return str(out)
    except Exception as e:
        return f"[Local generation failed: {e}]"

# ---------------------
# Conversation prompt builder
# ---------------------
SYSTEM_PROMPT = (
    "You are an expert computer technician and systems engineer. "
    "You know practical details about personal computers, servers, operating systems, networking, "
    "hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
    "When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
    "explain risks and trade-offs, and include commands or code snippets if they are useful."
)

def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
    parts = [f"System: {system_prompt}", "Conversation:"]
    for user_msg, assistant_msg in history:
        parts.append(f"User: {user_msg}")
        if assistant_msg:
            parts.append(f"Assistant: {assistant_msg}")
    parts.append("Assistant:")
    return "\n".join(parts)

# ---------------------
# Gradio callbacks
# ---------------------
def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
    if chat_history is None:
        chat_history = []
    chat_history.append([user_message, None])

    model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
    token = get_api_token(api_key_input)

    prompt = build_prompt(SYSTEM_PROMPT, chat_history)

    # Choose inference path
    if mode == "HuggingFace (remote)":
        # prefer huggingface_hub wrapper if available, fallback to router
        if HF_HUB_AVAILABLE:
            reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
        else:
            reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
    else:
        reply = call_local(prompt, model_to_use)

    # Ensure string and safe value
    if reply is None:
        reply = ""
    reply = str(reply)

    chat_history[-1][1] = reply
    return chat_history, ""

def clear_history():
    return []

# ---------------------
# Gradio UI
# ---------------------
with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
    gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
    gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")

    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(label="AI Computer Expert")
            user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
            with gr.Row():
                send_btn = gr.Button("Send")
                clear_btn = gr.Button("Clear")
        with gr.Column(scale=1):
            mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
            model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
            custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
            api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
            max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)

            gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")

    examples = [
        "My Windows 10 laptop randomly restarts — how do I diagnose this?",
        "How can I speed up boot time on Ubuntu?",
        "Explain how RAID 1 differs from RAID 5 and when to use each.",
        "I get 'kernel panic' on boot, what logs should I check?"
    ]
    gr.Examples(examples=examples, inputs=user_input)

    send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
    user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
    clear_btn.click(lambda: [], None, chatbot)

    gr.Markdown("---")
    gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*")

if __name__ == "__main__":
    # port can be set with PORT env var (useful for Spaces)
    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))