sakuragolden commited on
Commit
36907b9
·
verified ·
1 Parent(s): db09d63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -201
app.py CHANGED
@@ -1,187 +1,177 @@
1
  """
2
  app.py - All-Disciplines Knowledge Assistant (Gradio)
3
-
4
- Changes in this version (per user request):
5
- - Expanded discipline list to include many more scientific/technical humanities fields.
6
- - Frontend exposes separate API Key fields so you can provide OpenAI API key or Hugging Face API token from the UI (no need to set env vars).
7
- - Supports three LLM backends selectable at runtime:
8
- * openai (uses OpenAI Python SDK if installed and key provided in the UI)
9
- * huggingface_inference (calls Hugging Face Inference API using provided HF token and model name)
10
- * transformers_local (uses local `transformers` pipeline if that package is installed and the chosen model is available locally or will be downloaded)
11
- * offline (fallback limited knowledge)
12
- - Avoids import-time crashes: optional packages are imported lazily and guarded so the app always starts even if OpenAI/transformers/sympy are absent.
13
- - Still enforces English-only and refuses to generate quizzes/exam questions.
14
-
15
- Run (recommended):
16
- pip install gradio requests
17
- # optional for full features:
18
- pip install openai transformers sympy PyPDF2 python-dotenv
19
-
20
- python app.py
21
-
22
- UI notes:
23
- - Enter OpenAI API Key (if you want to use OpenAI). If blank, openai backend won't work.
24
- - Enter Hugging Face token (if you want to use HF Inference API).
25
- - Choose backend in the "Preferred backend" dropdown.
26
- - For HF Inference, enter model name (e.g., "gpt2", "bigscience/bloomz" or other text generation model hosted on HF Hub).
27
- - For transformers_local, enter a local model name (will attempt to download if not present).
28
  """
29
 
30
  import os
31
  import io
32
- import json
33
  import time
 
34
  import requests
35
  import importlib
36
- import gradio as gr
37
  from typing import List, Tuple
38
 
39
- # ----------------- Lazy capability checks -----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def has_module(name: str) -> bool:
41
  return importlib.util.find_spec(name) is not None
42
 
43
- _HAS_OPENAI = has_module("openai")
44
  _HAS_TRANSFORMERS = has_module("transformers")
45
  _HAS_SYMPY = has_module("sympy")
46
  _HAS_PYPDF2 = has_module("PyPDF2")
 
47
 
48
  if _HAS_SYMPY:
49
  import sympy as sp # type: ignore
50
 
51
- # ----------------- System-wide constants -----------------
52
- DEFAULT_OPENAI_MODEL = "gpt-4"
53
- SYSTEM_PROMPT = (
54
- "You are an encyclopedic, English-only scientific knowledge assistant. "
55
- "Always reply in clear, accurate English. Adjust depth and mathematical formality "
56
- "to the user's selected audience level (High School, Undergraduate, Graduate, Expert). "
57
- "Do NOT generate quizzes, exam questions, or practice problems. If requested, refuse politely and supply explanatory material instead. "
58
- "When applicable, include short suggestions for further reading (textbooks, review papers, or authoritative websites)."
59
- )
60
 
61
- # ----------------- Utility functions -----------------
 
 
 
 
62
 
63
- def openai_available_for_key(key: str) -> bool:
64
- return _HAS_OPENAI and bool(key and key.strip())
65
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # OpenAI generation (lazy import)
68
- def gen_with_openai(prompt: str, openai_api_key: str, model: str = DEFAULT_OPENAI_MODEL, temperature: float = 0.2, max_tokens: int = 800) -> str:
69
  try:
70
- import openai # type: ignore
71
- openai.api_key = openai_api_key
72
- messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}]
73
- resp = openai.ChatCompletion.create(model=model, messages=messages, temperature=temperature, max_tokens=max_tokens)
74
- return resp["choices"][0]["message"]["content"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  except Exception as e:
76
- return f"[OpenAI error] {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
 
79
- # Hugging Face Inference API generation (HTTP)
80
- def gen_with_hf_inference(prompt: str, hf_token: str, model: str = "gpt2", max_new_tokens: int = 256, temperature: float = 0.2) -> str:
81
  if not hf_token:
82
  return "[HuggingFace error] No HF token provided."
83
  headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
84
  url = f"https://api-inference.huggingface.co/models/{model}"
85
- payload = {
86
- "inputs": prompt,
87
- "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature},
88
- }
89
  try:
90
- r = requests.post(url, headers=headers, json=payload, timeout=60)
91
  r.raise_for_status()
92
  data = r.json()
93
- # HF Inference may return a list or dict depending on model
94
  if isinstance(data, dict):
95
- # some models return {'generated_text': '...'}
96
  if "generated_text" in data:
97
  return data["generated_text"].strip()
98
- # others return {'error': '...'}
99
  if "error" in data:
100
  return f"[HuggingFace error] {data['error']}"
101
  return json.dumps(data)
102
  if isinstance(data, list) and len(data) > 0:
103
- # common response shape: [{'generated_text': '...'}]
104
  first = data[0]
105
  if isinstance(first, dict) and "generated_text" in first:
106
  return first["generated_text"].strip()
107
- # some models return tokens or other structures
108
  return str(first)
109
  return str(data)
110
  except Exception as e:
111
  return f"[HuggingFace HTTP error] {e}"
112
 
113
 
114
- # Local transformers generation
115
- _TFM_PIPELINE = None
116
-
117
- def ensure_local_pipeline(local_model: str = "gpt2"):
118
- global _TFM_PIPELINE
119
- if _TFM_PIPELINE is not None and getattr(_TFM_PIPELINE, "model", None) is not None:
120
- return _TFM_PIPELINE
121
- if not _HAS_TRANSFORMERS:
122
- raise ImportError("transformers package not installed")
123
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
124
- try:
125
- tokenizer = AutoTokenizer.from_pretrained(local_model)
126
- model = AutoModelForCausalLM.from_pretrained(local_model)
127
- _TFM_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
128
- except Exception:
129
- # fallback to simple pipeline which may download model
130
- _TFM_PIPELINE = pipeline("text-generation", model=local_model)
131
- return _TFM_PIPELINE
132
-
133
-
134
- def gen_with_local_transformers(prompt: str, local_model: str = "gpt2", max_new_tokens: int = 256, temperature: float = 0.2) -> str:
135
  try:
136
- pipe = ensure_local_pipeline(local_model=local_model)
137
- out = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False)
138
- text = out[0]["generated_text"]
139
- if text.startswith(prompt):
140
- text = text[len(prompt) :].strip()
141
- return text
142
  except Exception as e:
143
- return f"[Local transformers error] {e}"
144
-
145
-
146
- # Offline limited knowledge fallback
147
- _SIMPLE_KB = {
148
- "what is gravity": "Gravity is a fundamental force that attracts masses toward each other. At everyday scales, Earth's gravity causes objects to fall and gives weight to physical objects. For more detail, see Newton's law of universal gravitation and Einstein's general relativity.",
149
- "what is dna": "DNA is the molecule that encodes genetic information in living organisms. Basic resources: molecular biology textbooks and NCBI tutorials.",
150
- }
151
 
152
 
153
  def offline_answer(prompt: str) -> str:
 
 
 
 
154
  q = prompt.lower()
155
- for k, v in _SIMPLE_KB.items():
156
  if k in q:
157
- return v + " (Offline mode: install an LLM backend for richer answers.)"
158
  return (
159
- "Offline mode: limited knowledge. To get detailed up-to-date answers, configure a backend (OpenAI or Hugging Face Inference or local transformers).
160
- "
161
- "As a guideline: provide definitions, core principles, experimental evidence, and suggested readings.
162
- "
163
- "Try asking 'What is gravity?' or 'Explain DNA structure.'"
164
  )
165
 
166
 
167
- # Choose generator
168
- def generate_answer(prompt: str, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float = 0.2):
169
  backend = backend or "offline"
170
- if backend == "openai":
171
- if not openai_available_for_key(openai_key):
172
- return "[OpenAI backend unavailable] Install openai package and provide API key in the UI."
173
- return gen_with_openai(prompt, openai_api_key=openai_key, model=DEFAULT_OPENAI_MODEL, temperature=temperature)
174
- if backend == "huggingface_inference":
175
- return gen_with_hf_inference(prompt, hf_token=hf_token, model=hf_model, temperature=temperature)
176
  if backend == "transformers_local":
177
- if not _HAS_TRANSFORMERS:
178
- return "[Local transformers unavailable] Install the transformers package to use local models."
179
  return gen_with_local_transformers(prompt, local_model=local_model, temperature=temperature)
 
 
 
 
180
  return offline_answer(prompt)
181
 
182
 
183
- # ----------------- File/text extraction -----------------
184
-
185
  def extract_text_from_file_obj(file_obj) -> str:
186
  if file_obj is None:
187
  return ""
@@ -192,8 +182,7 @@ def extract_text_from_file_obj(file_obj) -> str:
192
  import PyPDF2 # type: ignore
193
  reader = PyPDF2.PdfReader(io.BytesIO(raw))
194
  pages = [p.extract_text() or "" for p in reader.pages]
195
- return "
196
- ".join(pages)
197
  except Exception:
198
  pass
199
  try:
@@ -204,8 +193,8 @@ def extract_text_from_file_obj(file_obj) -> str:
204
  return ""
205
 
206
 
207
- # ----------------- Math helper -----------------
208
- def math_solve_or_explain(expr: str, prefer_steps: bool = True, backend: str = "offline", openai_key: str = "", hf_token: str = "", local_model: str = "gpt2") -> str:
209
  if not expr:
210
  return "Error: empty expression."
211
  if _HAS_SYMPY:
@@ -214,87 +203,67 @@ def math_solve_or_explain(expr: str, prefer_steps: bool = True, backend: str = "
214
  lhs, rhs = expr.split("=", 1)
215
  eq = sp.Eq(sp.sympify(lhs), sp.sympify(rhs))
216
  sol = sp.solve(eq)
217
- base = f"Analytic solution: {sol}
218
- "
219
  else:
220
  val = sp.simplify(sp.sympify(expr))
221
- base = f"Simplified/symbolic result:
222
- {sp.pretty(val)}
223
- "
224
  if prefer_steps:
225
- if backend in ("openai", "huggingface_inference", "transformers_local"):
226
- prompt = f"Provide step-by-step derivation for: {expr}
227
- Include explanations for each step."
228
- return base + "
229
- Step-by-step:
230
- " + generate_answer(prompt, backend, openai_key, hf_token, local_model, local_model)
231
- return base + "
232
- (Offline mode: detailed pedagogical steps unavailable.)"
233
  return base
234
  except Exception as e:
235
  # fallback to LLM
236
- return "SymPy parse error: " + str(e) + "
237
- Fallback to LLM...
238
- " + generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, local_model)
239
  # no sympy
240
- return generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, local_model)
241
 
242
 
243
- # ----------------- Prompt builder (prevent quizzes) -----------------
244
  def build_science_prompt(question: str, discipline: str, audience: str, depth: str) -> str:
245
  prompt = (
246
- f"Discipline: {discipline}
247
- Audience: {audience}
248
- Depth: {depth}
249
-
250
- "
251
- f"Question: {question}
252
-
253
- "
254
- "Please reply in clear English and include:
255
- "
256
- "1) A short direct answer (2-4 sentences).
257
- "
258
- "2) Underlying principles and reasoning (use LaTeX for equations if needed).
259
- "
260
- "3) Experimental/observational evidence if applicable.
261
- "
262
- "4) Real-world applications if applicable.
263
- "
264
- "5) Current consensus and open questions.
265
- "
266
- "6) Three suggested further reading items (textbooks, review papers, or authoritative websites).
267
-
268
- "
269
- "IMPORTANT: DO NOT generate quizzes, exam questions, or practice problems. If the user requests them, refuse and provide explanatory content instead."
270
  )
271
  return prompt
272
 
273
 
274
- # ----------------- Gradio functions -----------------
275
  SCIENCE_DISCIPLINES = [
276
- "Physics", "Chemistry", "Biology", "Mathematics", "Astronomy", "Earth Science", "Materials Science",
277
- "Engineering", "Computer Science", "Neuroscience", "Ecology", "Paleontology", "Climate Science",
278
- "Oceanography", "Nanoscience", "Cognitive Science", "Planetary Science", "Environmental Science",
279
- "Systems Biology", "Biophysics", "Biomedical Engineering", "Robotics", "Geophysics", "Astrophysics",
280
- "Chemical Engineering", "Civil Engineering", "Electrical Engineering", "Aerospace Engineering",
281
- "Philosophy of Science", "History of Science", "Interdisciplinary"
 
 
 
 
 
 
282
  ]
283
 
284
 
 
285
  def chat_handler(user_message: str, history: List[Tuple[str, str]], discipline: str, audience: str, depth: str,
286
  backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float):
287
  if user_message is None:
288
  return history, history
289
 
290
- # refuse quiz/exam creation
291
  banned_terms = ["quiz", "exam", "test", "exercise", "practice problem", "problem set"]
292
  if any(t in user_message.lower() for t in banned_terms):
293
  reply = "I do not generate quizzes, exam questions, or practice problems. I can provide detailed explanations, derivations, and suggested readings."
294
  history = history + [(user_message, reply)]
295
  return history, history
296
 
297
- # enforce English-only for convenience
298
  chinese_tokens = ["请", "出题", "练习题", "测验", "题目", "考试"]
299
  if any(t in user_message for t in chinese_tokens):
300
  reply = "Please ask your question in English. This assistant operates in English only."
@@ -313,35 +282,30 @@ def document_summarizer(file_obj, backend: str, openai_key: str, hf_token: str,
313
  return "Could not read the file or it appears empty."
314
  excerpt = text[:20000]
315
  prompt = (
316
- f"You are a scholarly reader. Audience: {audience}. Based on the text below, provide:
317
- "
318
- "1) A concise abstract (150-300 words).
319
- "
320
- "2) Key methods and data sources.
321
- "
322
- "3) Main conclusions and an assessment of confidence.
323
- "
324
- "4) Limitations and suggestions for future work.
325
- "
326
- "5) Suggested references or types of literature to check.
327
-
328
- "
329
- f"Text begins:
330
- {excerpt}"
331
  )
332
  return generate_answer(prompt, backend, openai_key, hf_token, hf_model, local_model)
333
 
334
 
335
  def math_ui_handler(expr: str, prefer_steps: bool, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str):
336
- return math_solve_or_explain(expr, prefer_steps, backend=backend, openai_key=openai_key, hf_token=hf_token, local_model=local_model)
337
 
338
 
339
  # ----------------- Build Gradio UI -----------------
340
-
341
  def build_ui():
342
  with gr.Blocks(title="All-Disciplines Knowledge Assistant (English)") as demo:
343
  gr.Markdown("# 🌐 All-Disciplines Knowledge Assistant — English Only")
344
- gr.Markdown("This assistant refuses to produce quizzes/exams. Enter API keys below to enable backends.")
 
 
 
 
345
 
346
  with gr.Row():
347
  with gr.Column(scale=3):
@@ -349,23 +313,20 @@ def build_ui():
349
  audience = gr.Dropdown(label="Audience level", choices=["High School", "Undergraduate", "Graduate", "Expert"], value="Undergraduate")
350
  depth = gr.Radio(label="Depth", choices=["overview", "detailed", "technical"], value="detailed")
351
 
352
- gr.Markdown("---
353
- **API keys / tokens (optional)**")
354
  openai_key = gr.Textbox(label="OpenAI API Key (paste here)", type="password")
355
  hf_token = gr.Textbox(label="Hugging Face API Token (paste here)", type="password")
356
 
357
- gr.Markdown("---
358
- **Backend selection**")
359
- backend = gr.Dropdown(label="Preferred backend", choices=["openai", "huggingface_inference", "transformers_local", "offline"], value=("openai" if _HAS_OPENAI else ("transformers_local" if _HAS_TRANSFORMERS else "offline")))
360
 
361
  gr.Markdown("Model settings (for HF / local transformers)")
362
- hf_model = gr.Textbox(label="Hugging Face Inference model name (e.g. gpt2 or bigscience/bloom)", value="gpt2")
363
- local_model = gr.Textbox(label="Local transformers model name (for transformers_local)", value="gpt2")
364
 
365
  temperature = gr.Slider(label="temperature", minimum=0.0, maximum=1.0, value=0.2, step=0.05)
366
 
367
- gr.Markdown("---
368
- **Conversation**")
369
  chatbot = gr.Chatbot(label="Conversation")
370
  state = gr.State([])
371
  user_input = gr.Textbox(label="Enter your scientific question in English", lines=3)
@@ -395,13 +356,22 @@ def build_ui():
395
  inputs=[expr, prefer_steps, backend, openai_key, hf_token, hf_model, local_model],
396
  outputs=[math_out])
397
 
398
- gr.Markdown("---
399
- **Disclaimer**: This assistant uses LLM backends that may produce incorrect or outdated information. For critical decisions, consult primary literature and domain experts.")
400
-
401
  return demo
402
 
403
 
 
404
  if __name__ == "__main__":
405
- print("Optional packages detected: openai=", _HAS_OPENAI, "transformers=", _HAS_TRANSFORMERS, "sympy=", _HAS_SYMPY)
 
 
 
 
 
 
 
 
 
 
406
  app = build_ui()
407
  app.launch(server_name="0.0.0.0", share=False)
 
1
  """
2
  app.py - All-Disciplines Knowledge Assistant (Gradio)
3
+ - Default local transformers model: bigscience/bloomz-1b1
4
+ - On startup, if transformers is available, attempt to download/load the model and print status steps.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
7
  import os
8
  import io
 
9
  import time
10
+ import json
11
  import requests
12
  import importlib
13
+ import threading
14
  from typing import List, Tuple
15
 
16
+ import gradio as gr
17
+
18
+ # ----------------- Configuration -----------------
19
+ DEFAULT_LOCAL_MODEL = "bigscience/bloomz-1b1" # default stronger open-source model
20
+ DEFAULT_HF_MODEL = "gpt2"
21
+ DEFAULT_OPENAI_MODEL = "gpt-4"
22
+ SYSTEM_PROMPT = (
23
+ "You are an encyclopedic, English-only scientific knowledge assistant. "
24
+ "Reply in clear, accurate English and adapt depth to the user's audience level (High School, Undergraduate, Graduate, Expert). "
25
+ "Do NOT generate quizzes, exam questions, or practice problems. If asked, refuse politely and offer explanatory material. "
26
+ "When appropriate, include suggested further reading (textbooks, review articles, or authoritative websites)."
27
+ )
28
+
29
+ # ----------------- Capability detection (lazy) -----------------
30
  def has_module(name: str) -> bool:
31
  return importlib.util.find_spec(name) is not None
32
 
 
33
  _HAS_TRANSFORMERS = has_module("transformers")
34
  _HAS_SYMPY = has_module("sympy")
35
  _HAS_PYPDF2 = has_module("PyPDF2")
36
+ _HAS_OPENAI = has_module("openai")
37
 
38
  if _HAS_SYMPY:
39
  import sympy as sp # type: ignore
40
 
41
+ # Global model/pipeline holder and status messages
42
+ _LOCAL_PIPELINE = None
43
+ _LOAD_STATUS = "Not started" # updated during startup
44
+ _LOAD_ERROR = None
 
 
 
 
 
45
 
46
+ # ----------------- Utilities: model loading with status -----------------
47
+ def set_status(msg: str):
48
+ global _LOAD_STATUS
49
+ _LOAD_STATUS = msg
50
+ print(f"[MODEL-STATUS] {msg}", flush=True)
51
 
 
 
52
 
53
+ def load_local_transformers_model(local_model: str = DEFAULT_LOCAL_MODEL):
54
+ """
55
+ Synchronously attempt to load a local transformers model.
56
+ This prints & updates stage messages so the container logs clearly show progress.
57
+ """
58
+ global _LOCAL_PIPELINE, _LOAD_STATUS, _LOAD_ERROR
59
+ if not _HAS_TRANSFORMERS:
60
+ _LOAD_ERROR = "transformers package not installed; local model unavailable."
61
+ set_status(_LOAD_ERROR)
62
+ return None
63
 
 
 
64
  try:
65
+ set_status(f"Checking availability of model '{local_model}' in cache or HF hub...")
66
+ # lazy import to avoid import-time crash
67
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
68
+
69
+ set_status("Downloading / loading tokenizer (this may take a while)...")
70
+ tokenizer = AutoTokenizer.from_pretrained(local_model, use_fast=True)
71
+
72
+ set_status("Downloading / loading model weights (this may take a while and use significant disk/memory)...")
73
+ # Try to reduce peak memory use; let transformers choose device
74
+ try:
75
+ model = AutoModelForCausalLM.from_pretrained(local_model, low_cpu_mem_usage=True)
76
+ except TypeError:
77
+ # older transformers may not have low_cpu_mem_usage
78
+ model = AutoModelForCausalLM.from_pretrained(local_model)
79
+
80
+ set_status("Initializing text-generation pipeline...")
81
+ # create a text-generation pipeline; do_sample=False for deterministic output by default
82
+ _LOCAL_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer)
83
+ set_status(f"Model '{local_model}' is ready and loaded into pipeline.")
84
+ return _LOCAL_PIPELINE
85
  except Exception as e:
86
+ _LOAD_ERROR = f"Failed to load local model '{local_model}': {e}"
87
+ set_status(_LOAD_ERROR)
88
+ return None
89
+
90
+
91
+ # ----------------- Generators -----------------
92
+ def gen_with_local_transformers(prompt: str, local_model: str = DEFAULT_LOCAL_MODEL, max_new_tokens: int = 256, temperature: float = 0.2) -> str:
93
+ global _LOCAL_PIPELINE
94
+ if _LOCAL_PIPELINE is None:
95
+ # Try to load on demand (synchronous)
96
+ load_local_transformers_model(local_model)
97
+ if _LOCAL_PIPELINE is None:
98
+ return "[Local transformers unavailable] Model pipeline not ready."
99
+ try:
100
+ out = _LOCAL_PIPELINE(prompt, max_new_tokens=max_new_tokens, do_sample=False)
101
+ text = out[0].get("generated_text", "")
102
+ if text.startswith(prompt):
103
+ text = text[len(prompt) :].strip()
104
+ return text
105
+ except Exception as e:
106
+ return f"[Local transformers generation error] {e}"
107
 
108
 
109
+ def gen_with_hf_inference(prompt: str, hf_token: str, model: str = DEFAULT_HF_MODEL, max_new_tokens: int = 256, temperature: float = 0.2) -> str:
 
110
  if not hf_token:
111
  return "[HuggingFace error] No HF token provided."
112
  headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
113
  url = f"https://api-inference.huggingface.co/models/{model}"
114
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}}
 
 
 
115
  try:
116
+ r = requests.post(url, headers=headers, json=payload, timeout=120)
117
  r.raise_for_status()
118
  data = r.json()
 
119
  if isinstance(data, dict):
 
120
  if "generated_text" in data:
121
  return data["generated_text"].strip()
 
122
  if "error" in data:
123
  return f"[HuggingFace error] {data['error']}"
124
  return json.dumps(data)
125
  if isinstance(data, list) and len(data) > 0:
 
126
  first = data[0]
127
  if isinstance(first, dict) and "generated_text" in first:
128
  return first["generated_text"].strip()
 
129
  return str(first)
130
  return str(data)
131
  except Exception as e:
132
  return f"[HuggingFace HTTP error] {e}"
133
 
134
 
135
+ def gen_with_openai(prompt: str, openai_key: str, model: str = DEFAULT_OPENAI_MODEL, temperature: float = 0.2, max_tokens: int = 600) -> str:
136
+ if not _HAS_OPENAI:
137
+ return "[OpenAI error] openai package not installed."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  try:
139
+ import openai # type: ignore
140
+ openai.api_key = openai_key
141
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}]
142
+ resp = openai.ChatCompletion.create(model=model, messages=messages, temperature=temperature, max_tokens=max_tokens)
143
+ return resp["choices"][0]["message"]["content"].strip()
 
144
  except Exception as e:
145
+ return f"[OpenAI error] {e}"
 
 
 
 
 
 
 
146
 
147
 
148
  def offline_answer(prompt: str) -> str:
149
+ simple_kb = {
150
+ "what is gravity": "Gravity pulls masses toward each other. See Newton's law and Einstein's general relativity.",
151
+ "what is dna": "DNA encodes genetic information; see molecular biology textbooks and NCBI resources.",
152
+ }
153
  q = prompt.lower()
154
+ for k, v in simple_kb.items():
155
  if k in q:
156
+ return v + " (Offline mode; configure an LLM backend for richer answers.)"
157
  return (
158
+ "Offline mode: limited knowledge. To get detailed, up-to-date answers, configure a backend (OpenAI, Hugging Face Inference, or local transformers). "
159
+ "Example: ask 'What is gravity?' or 'Explain DNA structure.'"
 
 
 
160
  )
161
 
162
 
163
+ def generate_answer(prompt: str, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float = 0.2) -> str:
 
164
  backend = backend or "offline"
 
 
 
 
 
 
165
  if backend == "transformers_local":
 
 
166
  return gen_with_local_transformers(prompt, local_model=local_model, temperature=temperature)
167
+ if backend == "huggingface_inference":
168
+ return gen_with_hf_inference(prompt, hf_token=hf_token, model=hf_model, temperature=temperature)
169
+ if backend == "openai":
170
+ return gen_with_openai(prompt, openai_key=openai_key, model=DEFAULT_OPENAI_MODEL, temperature=temperature)
171
  return offline_answer(prompt)
172
 
173
 
174
+ # ----------------- file/text extraction -----------------
 
175
  def extract_text_from_file_obj(file_obj) -> str:
176
  if file_obj is None:
177
  return ""
 
182
  import PyPDF2 # type: ignore
183
  reader = PyPDF2.PdfReader(io.BytesIO(raw))
184
  pages = [p.extract_text() or "" for p in reader.pages]
185
+ return "\n".join(pages)
 
186
  except Exception:
187
  pass
188
  try:
 
193
  return ""
194
 
195
 
196
+ # ----------------- math helper -----------------
197
+ def math_solve_or_explain(expr: str, prefer_steps: bool = True, backend: str = "transformers_local", openai_key: str = "", hf_token: str = "", hf_model: str = DEFAULT_HF_MODEL, local_model: str = DEFAULT_LOCAL_MODEL) -> str:
198
  if not expr:
199
  return "Error: empty expression."
200
  if _HAS_SYMPY:
 
203
  lhs, rhs = expr.split("=", 1)
204
  eq = sp.Eq(sp.sympify(lhs), sp.sympify(rhs))
205
  sol = sp.solve(eq)
206
+ base = f"Analytic solution: {sol}\n"
 
207
  else:
208
  val = sp.simplify(sp.sympify(expr))
209
+ base = f"Simplified/symbolic result:\n{sp.pretty(val)}\n"
 
 
210
  if prefer_steps:
211
+ prompt = f"Provide a clear step-by-step derivation for: {expr}\nInclude explanations for each step."
212
+ return base + "\nStep-by-step:\n" + generate_answer(prompt, backend, openai_key, hf_token, hf_model, local_model)
 
 
 
 
 
 
213
  return base
214
  except Exception as e:
215
  # fallback to LLM
216
+ return f"SymPy parse error: {e}\nFallback to LLM...\n" + generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, hf_model, local_model)
 
 
217
  # no sympy
218
+ return generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, hf_model, local_model)
219
 
220
 
221
+ # ----------------- prompt builder -----------------
222
  def build_science_prompt(question: str, discipline: str, audience: str, depth: str) -> str:
223
  prompt = (
224
+ f"Discipline: {discipline}\nAudience: {audience}\nDepth: {depth}\n\n"
225
+ f"Question: {question}\n\n"
226
+ "Please reply in clear English and include:\n"
227
+ "1) A short direct answer (2-4 sentences).\n"
228
+ "2) Underlying principles and reasoning (use LaTeX for equations if needed).\n"
229
+ "3) Experimental/observational evidence if applicable.\n"
230
+ "4) Real-world applications if applicable.\n"
231
+ "5) Current consensus and open questions.\n"
232
+ "6) Three suggested further reading items (textbooks, review articles, or authoritative websites).\n\n"
233
+ "IMPORTANT: DO NOT generate quizzes, exam questions, or practice problems. If requested, refuse and provide explanatory content instead."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  )
235
  return prompt
236
 
237
 
238
+ # ----------------- disciplines (expanded) -----------------
239
  SCIENCE_DISCIPLINES = [
240
+ "Physics", "Condensed Matter Physics", "Particle Physics", "Quantum Physics", "Astrophysics",
241
+ "Chemistry", "Physical Chemistry", "Organic Chemistry", "Inorganic Chemistry", "Analytical Chemistry",
242
+ "Biology", "Molecular Biology", "Cell Biology", "Genetics", "Evolutionary Biology",
243
+ "Mathematics", "Applied Mathematics", "Statistics", "Probability", "Numerical Analysis",
244
+ "Earth Science", "Geology", "Geophysics", "Oceanography", "Atmospheric Science",
245
+ "Materials Science", "Nanoscience", "Biomaterials",
246
+ "Engineering", "Mechanical Engineering", "Electrical Engineering", "Civil Engineering", "Aerospace Engineering", "Chemical Engineering", "Biomedical Engineering", "Robotics",
247
+ "Computer Science", "AI/ML", "Theoretical CS", "Systems & Networking", "Human-Computer Interaction",
248
+ "Neuroscience", "Cognitive Science", "Psychology", "Behavioral Neuroscience",
249
+ "Ecology", "Environmental Science", "Climate Science", "Paleontology", "Planetary Science",
250
+ "Biophysics", "Systems Biology", "Biomedical Research", "Philosophy of Science", "History of Science",
251
+ "Interdisciplinary"
252
  ]
253
 
254
 
255
+ # ----------------- Gradio functions -----------------
256
  def chat_handler(user_message: str, history: List[Tuple[str, str]], discipline: str, audience: str, depth: str,
257
  backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float):
258
  if user_message is None:
259
  return history, history
260
 
 
261
  banned_terms = ["quiz", "exam", "test", "exercise", "practice problem", "problem set"]
262
  if any(t in user_message.lower() for t in banned_terms):
263
  reply = "I do not generate quizzes, exam questions, or practice problems. I can provide detailed explanations, derivations, and suggested readings."
264
  history = history + [(user_message, reply)]
265
  return history, history
266
 
 
267
  chinese_tokens = ["请", "出题", "练习题", "测验", "题目", "考试"]
268
  if any(t in user_message for t in chinese_tokens):
269
  reply = "Please ask your question in English. This assistant operates in English only."
 
282
  return "Could not read the file or it appears empty."
283
  excerpt = text[:20000]
284
  prompt = (
285
+ f"You are a scholarly reader. Audience: {audience}. Based on the text below, provide:\n"
286
+ "1) A concise abstract (150-300 words).\n"
287
+ "2) Key methods and data sources.\n"
288
+ "3) Main conclusions and an assessment of confidence.\n"
289
+ "4) Limitations and suggestions for future work.\n"
290
+ "5) Suggested references or types of literature to check.\n\n"
291
+ f"Text begins:\n{excerpt}"
 
 
 
 
 
 
 
 
292
  )
293
  return generate_answer(prompt, backend, openai_key, hf_token, hf_model, local_model)
294
 
295
 
296
  def math_ui_handler(expr: str, prefer_steps: bool, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str):
297
+ return math_solve_or_explain(expr, prefer_steps, backend=backend, openai_key=openai_key, hf_token=hf_token, hf_model=hf_model, local_model=local_model)
298
 
299
 
300
  # ----------------- Build Gradio UI -----------------
 
301
  def build_ui():
302
  with gr.Blocks(title="All-Disciplines Knowledge Assistant (English)") as demo:
303
  gr.Markdown("# 🌐 All-Disciplines Knowledge Assistant — English Only")
304
+ # show the model load status in the UI
305
+ gr.Markdown(f"**Local model (default):** `{DEFAULT_LOCAL_MODEL}`")
306
+ gr.Markdown(f"**Current load status:** `{_LOAD_STATUS}`")
307
+ gr.Markdown("This assistant refuses to create quizzes/exams. Provide API keys below to enable OpenAI or Hugging Face Inference. "
308
+ "If you want to use a local model, ensure `transformers` and `torch` are installed and provide the local model name (default above).")
309
 
310
  with gr.Row():
311
  with gr.Column(scale=3):
 
313
  audience = gr.Dropdown(label="Audience level", choices=["High School", "Undergraduate", "Graduate", "Expert"], value="Undergraduate")
314
  depth = gr.Radio(label="Depth", choices=["overview", "detailed", "technical"], value="detailed")
315
 
316
+ gr.Markdown("---\n**API keys / tokens (optional)**")
 
317
  openai_key = gr.Textbox(label="OpenAI API Key (paste here)", type="password")
318
  hf_token = gr.Textbox(label="Hugging Face API Token (paste here)", type="password")
319
 
320
+ gr.Markdown("---\n**Backend selection**")
321
+ backend = gr.Dropdown(label="Preferred backend", choices=["transformers_local", "huggingface_inference", "openai", "offline"], value=("transformers_local" if _HAS_TRANSFORMERS else ("huggingface_inference" if not _HAS_TRANSFORMERS else "offline")))
 
322
 
323
  gr.Markdown("Model settings (for HF / local transformers)")
324
+ hf_model = gr.Textbox(label="Hugging Face Inference model name (e.g. gpt2 or bigscience/bloom)", value=DEFAULT_HF_MODEL)
325
+ local_model = gr.Textbox(label="Local transformers model name (for transformers_local)", value=DEFAULT_LOCAL_MODEL)
326
 
327
  temperature = gr.Slider(label="temperature", minimum=0.0, maximum=1.0, value=0.2, step=0.05)
328
 
329
+ gr.Markdown("---\n**Conversation**")
 
330
  chatbot = gr.Chatbot(label="Conversation")
331
  state = gr.State([])
332
  user_input = gr.Textbox(label="Enter your scientific question in English", lines=3)
 
356
  inputs=[expr, prefer_steps, backend, openai_key, hf_token, hf_model, local_model],
357
  outputs=[math_out])
358
 
359
+ gr.Markdown("---\n**Disclaimer**: This assistant may produce incorrect or outdated information. For critical decisions, consult primary literature and domain experts.")
 
 
360
  return demo
361
 
362
 
363
+ # ----------------- Main: load local model synchronously at startup (status shown) -----------------
364
  if __name__ == "__main__":
365
+ print("Starting All-Disciplines Knowledge Assistant...")
366
+ print("Optional packages detected: transformers=", _HAS_TRANSFORMERS, "sympy=", _HAS_SYMPY, "PyPDF2=", _HAS_PYPDF2)
367
+ # Attempt to load the default local model synchronously to show startup progress in logs
368
+ if _HAS_TRANSFORMERS:
369
+ print(f"Attempting to load default local model '{DEFAULT_LOCAL_MODEL}'. This may take time and download files. Check logs for progress.")
370
+ set_status("Startup: beginning local model load...")
371
+ load_local_transformers_model(DEFAULT_LOCAL_MODEL)
372
+ else:
373
+ set_status("transformers package not installed; local model unavailable (use HF Inference or OpenAI backends).")
374
+
375
+ # Start Gradio app
376
  app = build_ui()
377
  app.launch(server_name="0.0.0.0", share=False)