sakuragolden commited on
Commit
5425cf5
·
verified ·
1 Parent(s): e012abb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +253 -30
app.py CHANGED
@@ -1,45 +1,268 @@
1
- import gradio as gr
2
- import requests
3
  import os
 
 
 
 
 
4
 
5
- # Default HF model
6
- HUGGINGFACE_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
7
- API_URL = f"https://api-inference.huggingface.co/models/{HUGGINGFACE_MODEL}"
 
 
 
8
 
9
- # Query HF API
10
- def query_hf_api(prompt, api_key):
11
- if not api_key:
12
- return "Error: Please enter your Hugging Face API key."
 
 
13
 
14
- headers = {"Authorization": f"Bearer {api_key}"}
15
- payload = {"inputs": prompt}
 
 
16
 
17
- try:
18
- response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
19
- response.raise_for_status()
20
- data = response.json()
21
- if isinstance(data, list) and len(data) > 0:
22
- return data[0].get("generated_text", "No response.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  return str(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
- return f"API error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Gradio interface
28
- def chat(prompt, api_key):
29
- return query_hf_api(prompt, api_key)
 
 
 
 
 
 
 
 
 
 
30
 
31
- with gr.Blocks() as demo:
32
- gr.Markdown("# 🧠 AI Computer Expert\nAsk anything about computers!")
33
 
34
- api_key = gr.Textbox(label="HuggingFace API Key", placeholder="Enter your HF API key")
35
- prompt = gr.Textbox(label="Your Question", placeholder="Ask the AI anything about computers...")
36
- output = gr.Textbox(label="AI Answer")
 
 
 
 
37
 
38
- btn = gr.Button("Ask")
39
- btn.click(fn=chat, inputs=[prompt, api_key], outputs=output)
 
40
 
41
  gr.Markdown("---")
42
- gr.Markdown("*This app uses the Hugging Face Inference API. Enter any hosted model key to run.*")
43
 
44
  if __name__ == "__main__":
45
- demo.launch()
 
 
1
+ # app.py
 
2
  import os
3
+ import json
4
+ import requests
5
+ from typing import List, Optional
6
+
7
+ import gradio as gr
8
 
9
+ # Optional: huggingface_hub.InferenceApi if installed
10
+ try:
11
+ from huggingface_hub import InferenceApi
12
+ HF_HUB_AVAILABLE = True
13
+ except Exception:
14
+ HF_HUB_AVAILABLE = False
15
 
16
+ # Optional local generation support
17
+ try:
18
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
19
+ TRANSFORMERS_AVAILABLE = True
20
+ except Exception:
21
+ TRANSFORMERS_AVAILABLE = False
22
 
23
+ # ---------------------
24
+ # Config / Model list
25
+ # ---------------------
26
+ DEFAULT_MODEL = os.getenv("HUGGINGFACE_MODEL", "gpt2")
27
 
28
+ # A curated list of public models for quick selection (small->medium->instruction-tuned)
29
+ COMMON_MODELS = [
30
+ "gpt2",
31
+ "distilgpt2",
32
+ "google/flan-t5-small",
33
+ "google/flan-t5-base",
34
+ "google/flan-t5-large",
35
+ "google/flan-t5-xl",
36
+ "facebook/opt-1.3b",
37
+ "facebook/opt-2.7b",
38
+ "bigscience/bloom-560m",
39
+ "bigscience/bloomz-560m",
40
+ "tiiuae/falcon-7b-instruct", # may be gated
41
+ "mistralai/Mixtral-8x7B-Instruct-v0.1", # example gated/large
42
+ "stabilityai/stablelm-tuned-alpha-3b",
43
+ "EleutherAI/gpt-neo-2.7B",
44
+ "google/t5-v1_1-base",
45
+ "hf-internal-testing/tiny-random-gpt2"
46
+ ]
47
+
48
+ # ---------------------
49
+ # Helpers
50
+ # ---------------------
51
+ def normalize_hf_output(data) -> str:
52
+ """Normalize HF inference output (list/dict/string) to plain text."""
53
+ if data is None:
54
+ return ""
55
+ if isinstance(data, str):
56
+ return data.strip()
57
+ if isinstance(data, list) and len(data) > 0:
58
+ first = data[0]
59
+ if isinstance(first, dict):
60
+ for key in ("generated_text", "text", "content"):
61
+ if key in first and isinstance(first[key], str):
62
+ return first[key].strip()
63
+ # fallback: join string values
64
+ vals = [str(v) for v in first.values()]
65
+ return " ".join(vals).strip()
66
+ if all(isinstance(x, str) for x in data):
67
+ return "\n".join(data).strip()
68
  return str(data)
69
+ if isinstance(data, dict):
70
+ for key in ("generated_text", "text", "content"):
71
+ if key in data and isinstance(data[key], str):
72
+ return data[key].strip()
73
+ return json.dumps(data)
74
+ return str(data)
75
+
76
+ def get_api_token(input_token: Optional[str]) -> Optional[str]:
77
+ """Prefer UI-provided token, then env vars, else None."""
78
+ if input_token and input_token.strip():
79
+ return input_token.strip()
80
+ return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
81
+
82
+ # ---------------------
83
+ # Inference callers
84
+ # ---------------------
85
+ def call_hf_router(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
86
+ """
87
+ Call HF router endpoint which is more future-proof for some hosted models.
88
+ Returns a plain-text response or a helpful error message.
89
+ """
90
+ url = f"https://router.huggingface.co/hf-inference/{model}"
91
+ headers = {"Content-Type": "application/json"}
92
+ if token:
93
+ headers["Authorization"] = f"Bearer {token}"
94
+ payload = {
95
+ "inputs": prompt,
96
+ "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
97
+ }
98
+ try:
99
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
100
+ except Exception as e:
101
+ return f"[Request error: {e}]"
102
+
103
+ if resp.status_code == 410:
104
+ return ("[Error 410: endpoint/gone. This model may not have a hosted inference endpoint or requires gated access. "
105
+ "Try another model or check the model page for access requirements.]")
106
+ if resp.status_code == 404:
107
+ return "[Error 404: model not found. Check the model id or try a different model.]"
108
+ if resp.status_code == 401:
109
+ return "[Error 401: unauthorized. Your API key may be missing or lacking permissions.]"
110
+ if resp.status_code != 200:
111
+ # include limited info
112
+ try:
113
+ info = resp.json()
114
+ except Exception:
115
+ info = resp.text
116
+ return f"[HF error {resp.status_code}: {info}]"
117
+
118
+ try:
119
+ data = resp.json()
120
+ except Exception:
121
+ return resp.text
122
+ return normalize_hf_output(data)
123
+
124
+ def call_hf_inferenceapi(prompt: str, model: str, token: Optional[str], max_new_tokens: int = 256, temperature: float = 0.2) -> str:
125
+ """Use huggingface_hub.InferenceApi when available (wraps different behaviour)."""
126
+ if not HF_HUB_AVAILABLE:
127
+ return call_hf_router(prompt, model, token, max_new_tokens, temperature)
128
+ try:
129
+ api = InferenceApi(repo_id=model, token=token)
130
+ out = api(prompt, params={"max_new_tokens": max_new_tokens, "temperature": temperature})
131
+ return normalize_hf_output(out)
132
  except Exception as e:
133
+ # fallback to router
134
+ return call_hf_router(prompt, model, token, max_new_tokens, temperature)
135
+
136
+ # Local generation fallback
137
+ _local_gen = None
138
+ def init_local_gen(model_name: str):
139
+ global _local_gen
140
+ if not TRANSFORMERS_AVAILABLE:
141
+ return None
142
+ try:
143
+ # Try to initialize pipeline for the specific model
144
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
145
+ model = AutoModelForCausalLM.from_pretrained(model_name)
146
+ _local_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
147
+ return _local_gen
148
+ except Exception:
149
+ try:
150
+ _local_gen = pipeline("text-generation", model=model_name)
151
+ return _local_gen
152
+ except Exception:
153
+ return None
154
+
155
+ def call_local(prompt: str, model_name: str):
156
+ gen = init_local_gen(model_name)
157
+ if gen is None:
158
+ return "[Local generation unavailable — install 'transformers' and ensure the model is available locally.]"
159
+ try:
160
+ out = gen(prompt, max_length=len(prompt.split()) + 150, do_sample=True, top_p=0.95, temperature=0.8, num_return_sequences=1)
161
+ if isinstance(out, list) and len(out) > 0:
162
+ first = out[0]
163
+ if isinstance(first, dict):
164
+ for key in ("generated_text", "text"):
165
+ if key in first and isinstance(first[key], str):
166
+ return first[key].strip()
167
+ return str(first)
168
+ if isinstance(first, str):
169
+ return first
170
+ return str(out)
171
+ except Exception as e:
172
+ return f"[Local generation failed: {e}]"
173
+
174
+ # ---------------------
175
+ # Conversation prompt builder
176
+ # ---------------------
177
+ SYSTEM_PROMPT = (
178
+ "You are an expert computer technician and systems engineer. "
179
+ "You know practical details about personal computers, servers, operating systems, networking, "
180
+ "hardware troubleshooting, performance tuning, security best practices, software installation and debugging. "
181
+ "When a user asks a question, respond clearly and concisely in English. Provide step-by-step instructions when helpful, "
182
+ "explain risks and trade-offs, and include commands or code snippets if they are useful."
183
+ )
184
+
185
+ def build_prompt(system_prompt: str, history: List[List[str]]) -> str:
186
+ parts = [f"System: {system_prompt}", "Conversation:"]
187
+ for user_msg, assistant_msg in history:
188
+ parts.append(f"User: {user_msg}")
189
+ if assistant_msg:
190
+ parts.append(f"Assistant: {assistant_msg}")
191
+ parts.append("Assistant:")
192
+ return "\n".join(parts)
193
+
194
+ # ---------------------
195
+ # Gradio callbacks
196
+ # ---------------------
197
+ def respond(user_message: str, chat_history, mode: str, selected_model: str, custom_model: str, api_key_input: str, max_tokens: int):
198
+ if chat_history is None:
199
+ chat_history = []
200
+ chat_history.append([user_message, None])
201
+
202
+ model_to_use = custom_model.strip() if custom_model and custom_model.strip() else selected_model
203
+ token = get_api_token(api_key_input)
204
+
205
+ prompt = build_prompt(SYSTEM_PROMPT, chat_history)
206
+
207
+ # Choose inference path
208
+ if mode == "HuggingFace (remote)":
209
+ # prefer huggingface_hub wrapper if available, fallback to router
210
+ if HF_HUB_AVAILABLE:
211
+ reply = call_hf_inferenceapi(prompt, model_to_use, token, max_new_tokens=max_tokens)
212
+ else:
213
+ reply = call_hf_router(prompt, model_to_use, token, max_new_tokens=max_tokens)
214
+ else:
215
+ reply = call_local(prompt, model_to_use)
216
+
217
+ # Ensure string and safe value
218
+ if reply is None:
219
+ reply = ""
220
+ reply = str(reply)
221
+
222
+ chat_history[-1][1] = reply
223
+ return chat_history, ""
224
+
225
+ def clear_history():
226
+ return []
227
+
228
+ # ---------------------
229
+ # Gradio UI
230
+ # ---------------------
231
+ with gr.Blocks(title="AI Computer Expert (multi-model)") as demo:
232
+ gr.Markdown("# AI Computer Expert — Multi-model (Hugging Face)")
233
+ gr.Markdown("Ask anything about computers. Choose a model from the list or type a custom model id. Enter a HF API key (optional) to use remote inference.")
234
 
235
+ with gr.Row():
236
+ with gr.Column(scale=3):
237
+ chatbot = gr.Chatbot(label="AI Computer Expert")
238
+ user_input = gr.Textbox(placeholder="Type your question here (e.g. 'Why is my laptop overheating?')", show_label=False, lines=2)
239
+ with gr.Row():
240
+ send_btn = gr.Button("Send")
241
+ clear_btn = gr.Button("Clear")
242
+ with gr.Column(scale=1):
243
+ mode = gr.Radio(choices=["HuggingFace (remote)", "Local (transformers)"], value="HuggingFace (remote)", label="Mode")
244
+ model_dropdown = gr.Dropdown(label="Select model", choices=COMMON_MODELS, value=DEFAULT_MODEL)
245
+ custom_model = gr.Textbox(label="Custom model id (optional)", placeholder="owner/model-name (takes precedence over dropdown)")
246
+ api_key_box = gr.Textbox(label="HuggingFace API Key (optional)", type="password", placeholder="hf_xxx ...")
247
+ max_tokens = gr.Slider(label="Max new tokens", minimum=32, maximum=1024, step=32, value=256)
248
 
249
+ gr.Markdown("**Notes:**\n- Some large/gated models require special access or are not hosted for inference. If you see 410/404, try a different model or set up an Inference Endpoint.\n- If you don't want to use remote API, switch to Local and ensure you have the model installed and `transformers` available.")
 
250
 
251
+ examples = [
252
+ "My Windows 10 laptop randomly restarts how do I diagnose this?",
253
+ "How can I speed up boot time on Ubuntu?",
254
+ "Explain how RAID 1 differs from RAID 5 and when to use each.",
255
+ "I get 'kernel panic' on boot, what logs should I check?"
256
+ ]
257
+ gr.Examples(examples=examples, inputs=user_input)
258
 
259
+ send_btn.click(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
260
+ user_input.submit(respond, inputs=[user_input, chatbot, mode, model_dropdown, custom_model, api_key_box, max_tokens], outputs=[chatbot, user_input])
261
+ clear_btn.click(lambda: [], None, chatbot)
262
 
263
  gr.Markdown("---")
264
+ gr.Markdown("*This app supports many HF models; some models may be gated or not available via hosted inference.*")
265
 
266
  if __name__ == "__main__":
267
+ # port can be set with PORT env var (useful for Spaces)
268
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))