Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,187 +1,177 @@
|
|
| 1 |
"""
|
| 2 |
app.py - All-Disciplines Knowledge Assistant (Gradio)
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
- Expanded discipline list to include many more scientific/technical humanities fields.
|
| 6 |
-
- Frontend exposes separate API Key fields so you can provide OpenAI API key or Hugging Face API token from the UI (no need to set env vars).
|
| 7 |
-
- Supports three LLM backends selectable at runtime:
|
| 8 |
-
* openai (uses OpenAI Python SDK if installed and key provided in the UI)
|
| 9 |
-
* huggingface_inference (calls Hugging Face Inference API using provided HF token and model name)
|
| 10 |
-
* transformers_local (uses local `transformers` pipeline if that package is installed and the chosen model is available locally or will be downloaded)
|
| 11 |
-
* offline (fallback limited knowledge)
|
| 12 |
-
- Avoids import-time crashes: optional packages are imported lazily and guarded so the app always starts even if OpenAI/transformers/sympy are absent.
|
| 13 |
-
- Still enforces English-only and refuses to generate quizzes/exam questions.
|
| 14 |
-
|
| 15 |
-
Run (recommended):
|
| 16 |
-
pip install gradio requests
|
| 17 |
-
# optional for full features:
|
| 18 |
-
pip install openai transformers sympy PyPDF2 python-dotenv
|
| 19 |
-
|
| 20 |
-
python app.py
|
| 21 |
-
|
| 22 |
-
UI notes:
|
| 23 |
-
- Enter OpenAI API Key (if you want to use OpenAI). If blank, openai backend won't work.
|
| 24 |
-
- Enter Hugging Face token (if you want to use HF Inference API).
|
| 25 |
-
- Choose backend in the "Preferred backend" dropdown.
|
| 26 |
-
- For HF Inference, enter model name (e.g., "gpt2", "bigscience/bloomz" or other text generation model hosted on HF Hub).
|
| 27 |
-
- For transformers_local, enter a local model name (will attempt to download if not present).
|
| 28 |
"""
|
| 29 |
|
| 30 |
import os
|
| 31 |
import io
|
| 32 |
-
import json
|
| 33 |
import time
|
|
|
|
| 34 |
import requests
|
| 35 |
import importlib
|
| 36 |
-
import
|
| 37 |
from typing import List, Tuple
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def has_module(name: str) -> bool:
|
| 41 |
return importlib.util.find_spec(name) is not None
|
| 42 |
|
| 43 |
-
_HAS_OPENAI = has_module("openai")
|
| 44 |
_HAS_TRANSFORMERS = has_module("transformers")
|
| 45 |
_HAS_SYMPY = has_module("sympy")
|
| 46 |
_HAS_PYPDF2 = has_module("PyPDF2")
|
|
|
|
| 47 |
|
| 48 |
if _HAS_SYMPY:
|
| 49 |
import sympy as sp # type: ignore
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
"Always reply in clear, accurate English. Adjust depth and mathematical formality "
|
| 56 |
-
"to the user's selected audience level (High School, Undergraduate, Graduate, Expert). "
|
| 57 |
-
"Do NOT generate quizzes, exam questions, or practice problems. If requested, refuse politely and supply explanatory material instead. "
|
| 58 |
-
"When applicable, include short suggestions for further reading (textbooks, review papers, or authoritative websites)."
|
| 59 |
-
)
|
| 60 |
|
| 61 |
-
# -----------------
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
def openai_available_for_key(key: str) -> bool:
|
| 64 |
-
return _HAS_OPENAI and bool(key and key.strip())
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
# OpenAI generation (lazy import)
|
| 68 |
-
def gen_with_openai(prompt: str, openai_api_key: str, model: str = DEFAULT_OPENAI_MODEL, temperature: float = 0.2, max_tokens: int = 800) -> str:
|
| 69 |
try:
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
except Exception as e:
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
-
|
| 80 |
-
def gen_with_hf_inference(prompt: str, hf_token: str, model: str = "gpt2", max_new_tokens: int = 256, temperature: float = 0.2) -> str:
|
| 81 |
if not hf_token:
|
| 82 |
return "[HuggingFace error] No HF token provided."
|
| 83 |
headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
|
| 84 |
url = f"https://api-inference.huggingface.co/models/{model}"
|
| 85 |
-
payload = {
|
| 86 |
-
"inputs": prompt,
|
| 87 |
-
"parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature},
|
| 88 |
-
}
|
| 89 |
try:
|
| 90 |
-
r = requests.post(url, headers=headers, json=payload, timeout=
|
| 91 |
r.raise_for_status()
|
| 92 |
data = r.json()
|
| 93 |
-
# HF Inference may return a list or dict depending on model
|
| 94 |
if isinstance(data, dict):
|
| 95 |
-
# some models return {'generated_text': '...'}
|
| 96 |
if "generated_text" in data:
|
| 97 |
return data["generated_text"].strip()
|
| 98 |
-
# others return {'error': '...'}
|
| 99 |
if "error" in data:
|
| 100 |
return f"[HuggingFace error] {data['error']}"
|
| 101 |
return json.dumps(data)
|
| 102 |
if isinstance(data, list) and len(data) > 0:
|
| 103 |
-
# common response shape: [{'generated_text': '...'}]
|
| 104 |
first = data[0]
|
| 105 |
if isinstance(first, dict) and "generated_text" in first:
|
| 106 |
return first["generated_text"].strip()
|
| 107 |
-
# some models return tokens or other structures
|
| 108 |
return str(first)
|
| 109 |
return str(data)
|
| 110 |
except Exception as e:
|
| 111 |
return f"[HuggingFace HTTP error] {e}"
|
| 112 |
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
def ensure_local_pipeline(local_model: str = "gpt2"):
|
| 118 |
-
global _TFM_PIPELINE
|
| 119 |
-
if _TFM_PIPELINE is not None and getattr(_TFM_PIPELINE, "model", None) is not None:
|
| 120 |
-
return _TFM_PIPELINE
|
| 121 |
-
if not _HAS_TRANSFORMERS:
|
| 122 |
-
raise ImportError("transformers package not installed")
|
| 123 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
|
| 124 |
-
try:
|
| 125 |
-
tokenizer = AutoTokenizer.from_pretrained(local_model)
|
| 126 |
-
model = AutoModelForCausalLM.from_pretrained(local_model)
|
| 127 |
-
_TFM_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
|
| 128 |
-
except Exception:
|
| 129 |
-
# fallback to simple pipeline which may download model
|
| 130 |
-
_TFM_PIPELINE = pipeline("text-generation", model=local_model)
|
| 131 |
-
return _TFM_PIPELINE
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
def gen_with_local_transformers(prompt: str, local_model: str = "gpt2", max_new_tokens: int = 256, temperature: float = 0.2) -> str:
|
| 135 |
try:
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
return text
|
| 142 |
except Exception as e:
|
| 143 |
-
return f"[
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
# Offline limited knowledge fallback
|
| 147 |
-
_SIMPLE_KB = {
|
| 148 |
-
"what is gravity": "Gravity is a fundamental force that attracts masses toward each other. At everyday scales, Earth's gravity causes objects to fall and gives weight to physical objects. For more detail, see Newton's law of universal gravitation and Einstein's general relativity.",
|
| 149 |
-
"what is dna": "DNA is the molecule that encodes genetic information in living organisms. Basic resources: molecular biology textbooks and NCBI tutorials.",
|
| 150 |
-
}
|
| 151 |
|
| 152 |
|
| 153 |
def offline_answer(prompt: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
q = prompt.lower()
|
| 155 |
-
for k, v in
|
| 156 |
if k in q:
|
| 157 |
-
return v + " (Offline mode
|
| 158 |
return (
|
| 159 |
-
"Offline mode: limited knowledge. To get detailed up-to-date answers, configure a backend (OpenAI
|
| 160 |
-
"
|
| 161 |
-
"As a guideline: provide definitions, core principles, experimental evidence, and suggested readings.
|
| 162 |
-
"
|
| 163 |
-
"Try asking 'What is gravity?' or 'Explain DNA structure.'"
|
| 164 |
)
|
| 165 |
|
| 166 |
|
| 167 |
-
|
| 168 |
-
def generate_answer(prompt: str, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float = 0.2):
|
| 169 |
backend = backend or "offline"
|
| 170 |
-
if backend == "openai":
|
| 171 |
-
if not openai_available_for_key(openai_key):
|
| 172 |
-
return "[OpenAI backend unavailable] Install openai package and provide API key in the UI."
|
| 173 |
-
return gen_with_openai(prompt, openai_api_key=openai_key, model=DEFAULT_OPENAI_MODEL, temperature=temperature)
|
| 174 |
-
if backend == "huggingface_inference":
|
| 175 |
-
return gen_with_hf_inference(prompt, hf_token=hf_token, model=hf_model, temperature=temperature)
|
| 176 |
if backend == "transformers_local":
|
| 177 |
-
if not _HAS_TRANSFORMERS:
|
| 178 |
-
return "[Local transformers unavailable] Install the transformers package to use local models."
|
| 179 |
return gen_with_local_transformers(prompt, local_model=local_model, temperature=temperature)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
return offline_answer(prompt)
|
| 181 |
|
| 182 |
|
| 183 |
-
# -----------------
|
| 184 |
-
|
| 185 |
def extract_text_from_file_obj(file_obj) -> str:
|
| 186 |
if file_obj is None:
|
| 187 |
return ""
|
|
@@ -192,8 +182,7 @@ def extract_text_from_file_obj(file_obj) -> str:
|
|
| 192 |
import PyPDF2 # type: ignore
|
| 193 |
reader = PyPDF2.PdfReader(io.BytesIO(raw))
|
| 194 |
pages = [p.extract_text() or "" for p in reader.pages]
|
| 195 |
-
return "
|
| 196 |
-
".join(pages)
|
| 197 |
except Exception:
|
| 198 |
pass
|
| 199 |
try:
|
|
@@ -204,8 +193,8 @@ def extract_text_from_file_obj(file_obj) -> str:
|
|
| 204 |
return ""
|
| 205 |
|
| 206 |
|
| 207 |
-
# -----------------
|
| 208 |
-
def math_solve_or_explain(expr: str, prefer_steps: bool = True, backend: str = "
|
| 209 |
if not expr:
|
| 210 |
return "Error: empty expression."
|
| 211 |
if _HAS_SYMPY:
|
|
@@ -214,87 +203,67 @@ def math_solve_or_explain(expr: str, prefer_steps: bool = True, backend: str = "
|
|
| 214 |
lhs, rhs = expr.split("=", 1)
|
| 215 |
eq = sp.Eq(sp.sympify(lhs), sp.sympify(rhs))
|
| 216 |
sol = sp.solve(eq)
|
| 217 |
-
base = f"Analytic solution: {sol}
|
| 218 |
-
"
|
| 219 |
else:
|
| 220 |
val = sp.simplify(sp.sympify(expr))
|
| 221 |
-
base = f"Simplified/symbolic result
|
| 222 |
-
{sp.pretty(val)}
|
| 223 |
-
"
|
| 224 |
if prefer_steps:
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
Include explanations for each step."
|
| 228 |
-
return base + "
|
| 229 |
-
Step-by-step:
|
| 230 |
-
" + generate_answer(prompt, backend, openai_key, hf_token, local_model, local_model)
|
| 231 |
-
return base + "
|
| 232 |
-
(Offline mode: detailed pedagogical steps unavailable.)"
|
| 233 |
return base
|
| 234 |
except Exception as e:
|
| 235 |
# fallback to LLM
|
| 236 |
-
return "SymPy parse error: " +
|
| 237 |
-
Fallback to LLM...
|
| 238 |
-
" + generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, local_model)
|
| 239 |
# no sympy
|
| 240 |
-
return generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, local_model)
|
| 241 |
|
| 242 |
|
| 243 |
-
# -----------------
|
| 244 |
def build_science_prompt(question: str, discipline: str, audience: str, depth: str) -> str:
|
| 245 |
prompt = (
|
| 246 |
-
f"Discipline: {discipline}
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
"
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"1) A short direct answer (2-4 sentences).
|
| 257 |
-
"
|
| 258 |
-
"2) Underlying principles and reasoning (use LaTeX for equations if needed).
|
| 259 |
-
"
|
| 260 |
-
"3) Experimental/observational evidence if applicable.
|
| 261 |
-
"
|
| 262 |
-
"4) Real-world applications if applicable.
|
| 263 |
-
"
|
| 264 |
-
"5) Current consensus and open questions.
|
| 265 |
-
"
|
| 266 |
-
"6) Three suggested further reading items (textbooks, review papers, or authoritative websites).
|
| 267 |
-
|
| 268 |
-
"
|
| 269 |
-
"IMPORTANT: DO NOT generate quizzes, exam questions, or practice problems. If the user requests them, refuse and provide explanatory content instead."
|
| 270 |
)
|
| 271 |
return prompt
|
| 272 |
|
| 273 |
|
| 274 |
-
# -----------------
|
| 275 |
SCIENCE_DISCIPLINES = [
|
| 276 |
-
"Physics", "
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
-
"
|
| 281 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
]
|
| 283 |
|
| 284 |
|
|
|
|
| 285 |
def chat_handler(user_message: str, history: List[Tuple[str, str]], discipline: str, audience: str, depth: str,
|
| 286 |
backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float):
|
| 287 |
if user_message is None:
|
| 288 |
return history, history
|
| 289 |
|
| 290 |
-
# refuse quiz/exam creation
|
| 291 |
banned_terms = ["quiz", "exam", "test", "exercise", "practice problem", "problem set"]
|
| 292 |
if any(t in user_message.lower() for t in banned_terms):
|
| 293 |
reply = "I do not generate quizzes, exam questions, or practice problems. I can provide detailed explanations, derivations, and suggested readings."
|
| 294 |
history = history + [(user_message, reply)]
|
| 295 |
return history, history
|
| 296 |
|
| 297 |
-
# enforce English-only for convenience
|
| 298 |
chinese_tokens = ["请", "出题", "练习题", "测验", "题目", "考试"]
|
| 299 |
if any(t in user_message for t in chinese_tokens):
|
| 300 |
reply = "Please ask your question in English. This assistant operates in English only."
|
|
@@ -313,35 +282,30 @@ def document_summarizer(file_obj, backend: str, openai_key: str, hf_token: str,
|
|
| 313 |
return "Could not read the file or it appears empty."
|
| 314 |
excerpt = text[:20000]
|
| 315 |
prompt = (
|
| 316 |
-
f"You are a scholarly reader. Audience: {audience}. Based on the text below, provide
|
| 317 |
-
"
|
| 318 |
-
"
|
| 319 |
-
"
|
| 320 |
-
"
|
| 321 |
-
"
|
| 322 |
-
"
|
| 323 |
-
"
|
| 324 |
-
"4) Limitations and suggestions for future work.
|
| 325 |
-
"
|
| 326 |
-
"5) Suggested references or types of literature to check.
|
| 327 |
-
|
| 328 |
-
"
|
| 329 |
-
f"Text begins:
|
| 330 |
-
{excerpt}"
|
| 331 |
)
|
| 332 |
return generate_answer(prompt, backend, openai_key, hf_token, hf_model, local_model)
|
| 333 |
|
| 334 |
|
| 335 |
def math_ui_handler(expr: str, prefer_steps: bool, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str):
|
| 336 |
-
return math_solve_or_explain(expr, prefer_steps, backend=backend, openai_key=openai_key, hf_token=hf_token, local_model=local_model)
|
| 337 |
|
| 338 |
|
| 339 |
# ----------------- Build Gradio UI -----------------
|
| 340 |
-
|
| 341 |
def build_ui():
|
| 342 |
with gr.Blocks(title="All-Disciplines Knowledge Assistant (English)") as demo:
|
| 343 |
gr.Markdown("# 🌐 All-Disciplines Knowledge Assistant — English Only")
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
with gr.Row():
|
| 347 |
with gr.Column(scale=3):
|
|
@@ -349,23 +313,20 @@ def build_ui():
|
|
| 349 |
audience = gr.Dropdown(label="Audience level", choices=["High School", "Undergraduate", "Graduate", "Expert"], value="Undergraduate")
|
| 350 |
depth = gr.Radio(label="Depth", choices=["overview", "detailed", "technical"], value="detailed")
|
| 351 |
|
| 352 |
-
gr.Markdown("
|
| 353 |
-
**API keys / tokens (optional)**")
|
| 354 |
openai_key = gr.Textbox(label="OpenAI API Key (paste here)", type="password")
|
| 355 |
hf_token = gr.Textbox(label="Hugging Face API Token (paste here)", type="password")
|
| 356 |
|
| 357 |
-
gr.Markdown("
|
| 358 |
-
|
| 359 |
-
backend = gr.Dropdown(label="Preferred backend", choices=["openai", "huggingface_inference", "transformers_local", "offline"], value=("openai" if _HAS_OPENAI else ("transformers_local" if _HAS_TRANSFORMERS else "offline")))
|
| 360 |
|
| 361 |
gr.Markdown("Model settings (for HF / local transformers)")
|
| 362 |
-
hf_model = gr.Textbox(label="Hugging Face Inference model name (e.g. gpt2 or bigscience/bloom)", value=
|
| 363 |
-
local_model = gr.Textbox(label="Local transformers model name (for transformers_local)", value=
|
| 364 |
|
| 365 |
temperature = gr.Slider(label="temperature", minimum=0.0, maximum=1.0, value=0.2, step=0.05)
|
| 366 |
|
| 367 |
-
gr.Markdown("
|
| 368 |
-
**Conversation**")
|
| 369 |
chatbot = gr.Chatbot(label="Conversation")
|
| 370 |
state = gr.State([])
|
| 371 |
user_input = gr.Textbox(label="Enter your scientific question in English", lines=3)
|
|
@@ -395,13 +356,22 @@ def build_ui():
|
|
| 395 |
inputs=[expr, prefer_steps, backend, openai_key, hf_token, hf_model, local_model],
|
| 396 |
outputs=[math_out])
|
| 397 |
|
| 398 |
-
gr.Markdown("
|
| 399 |
-
**Disclaimer**: This assistant uses LLM backends that may produce incorrect or outdated information. For critical decisions, consult primary literature and domain experts.")
|
| 400 |
-
|
| 401 |
return demo
|
| 402 |
|
| 403 |
|
|
|
|
| 404 |
if __name__ == "__main__":
|
| 405 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
app = build_ui()
|
| 407 |
app.launch(server_name="0.0.0.0", share=False)
|
|
|
|
| 1 |
"""
|
| 2 |
app.py - All-Disciplines Knowledge Assistant (Gradio)
|
| 3 |
+
- Default local transformers model: bigscience/bloomz-1b1
|
| 4 |
+
- On startup, if transformers is available, attempt to download/load the model and print status steps.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
| 8 |
import io
|
|
|
|
| 9 |
import time
|
| 10 |
+
import json
|
| 11 |
import requests
|
| 12 |
import importlib
|
| 13 |
+
import threading
|
| 14 |
from typing import List, Tuple
|
| 15 |
|
| 16 |
+
import gradio as gr
|
| 17 |
+
|
| 18 |
+
# ----------------- Configuration -----------------
|
| 19 |
+
DEFAULT_LOCAL_MODEL = "bigscience/bloomz-1b1" # default stronger open-source model
|
| 20 |
+
DEFAULT_HF_MODEL = "gpt2"
|
| 21 |
+
DEFAULT_OPENAI_MODEL = "gpt-4"
|
| 22 |
+
SYSTEM_PROMPT = (
|
| 23 |
+
"You are an encyclopedic, English-only scientific knowledge assistant. "
|
| 24 |
+
"Reply in clear, accurate English and adapt depth to the user's audience level (High School, Undergraduate, Graduate, Expert). "
|
| 25 |
+
"Do NOT generate quizzes, exam questions, or practice problems. If asked, refuse politely and offer explanatory material. "
|
| 26 |
+
"When appropriate, include suggested further reading (textbooks, review articles, or authoritative websites)."
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# ----------------- Capability detection (lazy) -----------------
|
| 30 |
def has_module(name: str) -> bool:
|
| 31 |
return importlib.util.find_spec(name) is not None
|
| 32 |
|
|
|
|
| 33 |
_HAS_TRANSFORMERS = has_module("transformers")
|
| 34 |
_HAS_SYMPY = has_module("sympy")
|
| 35 |
_HAS_PYPDF2 = has_module("PyPDF2")
|
| 36 |
+
_HAS_OPENAI = has_module("openai")
|
| 37 |
|
| 38 |
if _HAS_SYMPY:
|
| 39 |
import sympy as sp # type: ignore
|
| 40 |
|
| 41 |
+
# Global model/pipeline holder and status messages
|
| 42 |
+
_LOCAL_PIPELINE = None
|
| 43 |
+
_LOAD_STATUS = "Not started" # updated during startup
|
| 44 |
+
_LOAD_ERROR = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
# ----------------- Utilities: model loading with status -----------------
|
| 47 |
+
def set_status(msg: str):
|
| 48 |
+
global _LOAD_STATUS
|
| 49 |
+
_LOAD_STATUS = msg
|
| 50 |
+
print(f"[MODEL-STATUS] {msg}", flush=True)
|
| 51 |
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
def load_local_transformers_model(local_model: str = DEFAULT_LOCAL_MODEL):
|
| 54 |
+
"""
|
| 55 |
+
Synchronously attempt to load a local transformers model.
|
| 56 |
+
This prints & updates stage messages so the container logs clearly show progress.
|
| 57 |
+
"""
|
| 58 |
+
global _LOCAL_PIPELINE, _LOAD_STATUS, _LOAD_ERROR
|
| 59 |
+
if not _HAS_TRANSFORMERS:
|
| 60 |
+
_LOAD_ERROR = "transformers package not installed; local model unavailable."
|
| 61 |
+
set_status(_LOAD_ERROR)
|
| 62 |
+
return None
|
| 63 |
|
|
|
|
|
|
|
| 64 |
try:
|
| 65 |
+
set_status(f"Checking availability of model '{local_model}' in cache or HF hub...")
|
| 66 |
+
# lazy import to avoid import-time crash
|
| 67 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
|
| 68 |
+
|
| 69 |
+
set_status("Downloading / loading tokenizer (this may take a while)...")
|
| 70 |
+
tokenizer = AutoTokenizer.from_pretrained(local_model, use_fast=True)
|
| 71 |
+
|
| 72 |
+
set_status("Downloading / loading model weights (this may take a while and use significant disk/memory)...")
|
| 73 |
+
# Try to reduce peak memory use; let transformers choose device
|
| 74 |
+
try:
|
| 75 |
+
model = AutoModelForCausalLM.from_pretrained(local_model, low_cpu_mem_usage=True)
|
| 76 |
+
except TypeError:
|
| 77 |
+
# older transformers may not have low_cpu_mem_usage
|
| 78 |
+
model = AutoModelForCausalLM.from_pretrained(local_model)
|
| 79 |
+
|
| 80 |
+
set_status("Initializing text-generation pipeline...")
|
| 81 |
+
# create a text-generation pipeline; do_sample=False for deterministic output by default
|
| 82 |
+
_LOCAL_PIPELINE = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 83 |
+
set_status(f"Model '{local_model}' is ready and loaded into pipeline.")
|
| 84 |
+
return _LOCAL_PIPELINE
|
| 85 |
except Exception as e:
|
| 86 |
+
_LOAD_ERROR = f"Failed to load local model '{local_model}': {e}"
|
| 87 |
+
set_status(_LOAD_ERROR)
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ----------------- Generators -----------------
|
| 92 |
+
def gen_with_local_transformers(prompt: str, local_model: str = DEFAULT_LOCAL_MODEL, max_new_tokens: int = 256, temperature: float = 0.2) -> str:
|
| 93 |
+
global _LOCAL_PIPELINE
|
| 94 |
+
if _LOCAL_PIPELINE is None:
|
| 95 |
+
# Try to load on demand (synchronous)
|
| 96 |
+
load_local_transformers_model(local_model)
|
| 97 |
+
if _LOCAL_PIPELINE is None:
|
| 98 |
+
return "[Local transformers unavailable] Model pipeline not ready."
|
| 99 |
+
try:
|
| 100 |
+
out = _LOCAL_PIPELINE(prompt, max_new_tokens=max_new_tokens, do_sample=False)
|
| 101 |
+
text = out[0].get("generated_text", "")
|
| 102 |
+
if text.startswith(prompt):
|
| 103 |
+
text = text[len(prompt) :].strip()
|
| 104 |
+
return text
|
| 105 |
+
except Exception as e:
|
| 106 |
+
return f"[Local transformers generation error] {e}"
|
| 107 |
|
| 108 |
|
| 109 |
+
def gen_with_hf_inference(prompt: str, hf_token: str, model: str = DEFAULT_HF_MODEL, max_new_tokens: int = 256, temperature: float = 0.2) -> str:
|
|
|
|
| 110 |
if not hf_token:
|
| 111 |
return "[HuggingFace error] No HF token provided."
|
| 112 |
headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
|
| 113 |
url = f"https://api-inference.huggingface.co/models/{model}"
|
| 114 |
+
payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}}
|
|
|
|
|
|
|
|
|
|
| 115 |
try:
|
| 116 |
+
r = requests.post(url, headers=headers, json=payload, timeout=120)
|
| 117 |
r.raise_for_status()
|
| 118 |
data = r.json()
|
|
|
|
| 119 |
if isinstance(data, dict):
|
|
|
|
| 120 |
if "generated_text" in data:
|
| 121 |
return data["generated_text"].strip()
|
|
|
|
| 122 |
if "error" in data:
|
| 123 |
return f"[HuggingFace error] {data['error']}"
|
| 124 |
return json.dumps(data)
|
| 125 |
if isinstance(data, list) and len(data) > 0:
|
|
|
|
| 126 |
first = data[0]
|
| 127 |
if isinstance(first, dict) and "generated_text" in first:
|
| 128 |
return first["generated_text"].strip()
|
|
|
|
| 129 |
return str(first)
|
| 130 |
return str(data)
|
| 131 |
except Exception as e:
|
| 132 |
return f"[HuggingFace HTTP error] {e}"
|
| 133 |
|
| 134 |
|
| 135 |
+
def gen_with_openai(prompt: str, openai_key: str, model: str = DEFAULT_OPENAI_MODEL, temperature: float = 0.2, max_tokens: int = 600) -> str:
|
| 136 |
+
if not _HAS_OPENAI:
|
| 137 |
+
return "[OpenAI error] openai package not installed."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
try:
|
| 139 |
+
import openai # type: ignore
|
| 140 |
+
openai.api_key = openai_key
|
| 141 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}]
|
| 142 |
+
resp = openai.ChatCompletion.create(model=model, messages=messages, temperature=temperature, max_tokens=max_tokens)
|
| 143 |
+
return resp["choices"][0]["message"]["content"].strip()
|
|
|
|
| 144 |
except Exception as e:
|
| 145 |
+
return f"[OpenAI error] {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
def offline_answer(prompt: str) -> str:
|
| 149 |
+
simple_kb = {
|
| 150 |
+
"what is gravity": "Gravity pulls masses toward each other. See Newton's law and Einstein's general relativity.",
|
| 151 |
+
"what is dna": "DNA encodes genetic information; see molecular biology textbooks and NCBI resources.",
|
| 152 |
+
}
|
| 153 |
q = prompt.lower()
|
| 154 |
+
for k, v in simple_kb.items():
|
| 155 |
if k in q:
|
| 156 |
+
return v + " (Offline mode; configure an LLM backend for richer answers.)"
|
| 157 |
return (
|
| 158 |
+
"Offline mode: limited knowledge. To get detailed, up-to-date answers, configure a backend (OpenAI, Hugging Face Inference, or local transformers). "
|
| 159 |
+
"Example: ask 'What is gravity?' or 'Explain DNA structure.'"
|
|
|
|
|
|
|
|
|
|
| 160 |
)
|
| 161 |
|
| 162 |
|
| 163 |
+
def generate_answer(prompt: str, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float = 0.2) -> str:
|
|
|
|
| 164 |
backend = backend or "offline"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
if backend == "transformers_local":
|
|
|
|
|
|
|
| 166 |
return gen_with_local_transformers(prompt, local_model=local_model, temperature=temperature)
|
| 167 |
+
if backend == "huggingface_inference":
|
| 168 |
+
return gen_with_hf_inference(prompt, hf_token=hf_token, model=hf_model, temperature=temperature)
|
| 169 |
+
if backend == "openai":
|
| 170 |
+
return gen_with_openai(prompt, openai_key=openai_key, model=DEFAULT_OPENAI_MODEL, temperature=temperature)
|
| 171 |
return offline_answer(prompt)
|
| 172 |
|
| 173 |
|
| 174 |
+
# ----------------- file/text extraction -----------------
|
|
|
|
| 175 |
def extract_text_from_file_obj(file_obj) -> str:
|
| 176 |
if file_obj is None:
|
| 177 |
return ""
|
|
|
|
| 182 |
import PyPDF2 # type: ignore
|
| 183 |
reader = PyPDF2.PdfReader(io.BytesIO(raw))
|
| 184 |
pages = [p.extract_text() or "" for p in reader.pages]
|
| 185 |
+
return "\n".join(pages)
|
|
|
|
| 186 |
except Exception:
|
| 187 |
pass
|
| 188 |
try:
|
|
|
|
| 193 |
return ""
|
| 194 |
|
| 195 |
|
| 196 |
+
# ----------------- math helper -----------------
|
| 197 |
+
def math_solve_or_explain(expr: str, prefer_steps: bool = True, backend: str = "transformers_local", openai_key: str = "", hf_token: str = "", hf_model: str = DEFAULT_HF_MODEL, local_model: str = DEFAULT_LOCAL_MODEL) -> str:
|
| 198 |
if not expr:
|
| 199 |
return "Error: empty expression."
|
| 200 |
if _HAS_SYMPY:
|
|
|
|
| 203 |
lhs, rhs = expr.split("=", 1)
|
| 204 |
eq = sp.Eq(sp.sympify(lhs), sp.sympify(rhs))
|
| 205 |
sol = sp.solve(eq)
|
| 206 |
+
base = f"Analytic solution: {sol}\n"
|
|
|
|
| 207 |
else:
|
| 208 |
val = sp.simplify(sp.sympify(expr))
|
| 209 |
+
base = f"Simplified/symbolic result:\n{sp.pretty(val)}\n"
|
|
|
|
|
|
|
| 210 |
if prefer_steps:
|
| 211 |
+
prompt = f"Provide a clear step-by-step derivation for: {expr}\nInclude explanations for each step."
|
| 212 |
+
return base + "\nStep-by-step:\n" + generate_answer(prompt, backend, openai_key, hf_token, hf_model, local_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
return base
|
| 214 |
except Exception as e:
|
| 215 |
# fallback to LLM
|
| 216 |
+
return f"SymPy parse error: {e}\nFallback to LLM...\n" + generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, hf_model, local_model)
|
|
|
|
|
|
|
| 217 |
# no sympy
|
| 218 |
+
return generate_answer(f"Derive/solve: {expr}", backend, openai_key, hf_token, hf_model, local_model)
|
| 219 |
|
| 220 |
|
| 221 |
+
# ----------------- prompt builder -----------------
|
| 222 |
def build_science_prompt(question: str, discipline: str, audience: str, depth: str) -> str:
|
| 223 |
prompt = (
|
| 224 |
+
f"Discipline: {discipline}\nAudience: {audience}\nDepth: {depth}\n\n"
|
| 225 |
+
f"Question: {question}\n\n"
|
| 226 |
+
"Please reply in clear English and include:\n"
|
| 227 |
+
"1) A short direct answer (2-4 sentences).\n"
|
| 228 |
+
"2) Underlying principles and reasoning (use LaTeX for equations if needed).\n"
|
| 229 |
+
"3) Experimental/observational evidence if applicable.\n"
|
| 230 |
+
"4) Real-world applications if applicable.\n"
|
| 231 |
+
"5) Current consensus and open questions.\n"
|
| 232 |
+
"6) Three suggested further reading items (textbooks, review articles, or authoritative websites).\n\n"
|
| 233 |
+
"IMPORTANT: DO NOT generate quizzes, exam questions, or practice problems. If requested, refuse and provide explanatory content instead."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
)
|
| 235 |
return prompt
|
| 236 |
|
| 237 |
|
| 238 |
+
# ----------------- disciplines (expanded) -----------------
|
| 239 |
SCIENCE_DISCIPLINES = [
|
| 240 |
+
"Physics", "Condensed Matter Physics", "Particle Physics", "Quantum Physics", "Astrophysics",
|
| 241 |
+
"Chemistry", "Physical Chemistry", "Organic Chemistry", "Inorganic Chemistry", "Analytical Chemistry",
|
| 242 |
+
"Biology", "Molecular Biology", "Cell Biology", "Genetics", "Evolutionary Biology",
|
| 243 |
+
"Mathematics", "Applied Mathematics", "Statistics", "Probability", "Numerical Analysis",
|
| 244 |
+
"Earth Science", "Geology", "Geophysics", "Oceanography", "Atmospheric Science",
|
| 245 |
+
"Materials Science", "Nanoscience", "Biomaterials",
|
| 246 |
+
"Engineering", "Mechanical Engineering", "Electrical Engineering", "Civil Engineering", "Aerospace Engineering", "Chemical Engineering", "Biomedical Engineering", "Robotics",
|
| 247 |
+
"Computer Science", "AI/ML", "Theoretical CS", "Systems & Networking", "Human-Computer Interaction",
|
| 248 |
+
"Neuroscience", "Cognitive Science", "Psychology", "Behavioral Neuroscience",
|
| 249 |
+
"Ecology", "Environmental Science", "Climate Science", "Paleontology", "Planetary Science",
|
| 250 |
+
"Biophysics", "Systems Biology", "Biomedical Research", "Philosophy of Science", "History of Science",
|
| 251 |
+
"Interdisciplinary"
|
| 252 |
]
|
| 253 |
|
| 254 |
|
| 255 |
+
# ----------------- Gradio functions -----------------
|
| 256 |
def chat_handler(user_message: str, history: List[Tuple[str, str]], discipline: str, audience: str, depth: str,
|
| 257 |
backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str, temperature: float):
|
| 258 |
if user_message is None:
|
| 259 |
return history, history
|
| 260 |
|
|
|
|
| 261 |
banned_terms = ["quiz", "exam", "test", "exercise", "practice problem", "problem set"]
|
| 262 |
if any(t in user_message.lower() for t in banned_terms):
|
| 263 |
reply = "I do not generate quizzes, exam questions, or practice problems. I can provide detailed explanations, derivations, and suggested readings."
|
| 264 |
history = history + [(user_message, reply)]
|
| 265 |
return history, history
|
| 266 |
|
|
|
|
| 267 |
chinese_tokens = ["请", "出题", "练习题", "测验", "题目", "考试"]
|
| 268 |
if any(t in user_message for t in chinese_tokens):
|
| 269 |
reply = "Please ask your question in English. This assistant operates in English only."
|
|
|
|
| 282 |
return "Could not read the file or it appears empty."
|
| 283 |
excerpt = text[:20000]
|
| 284 |
prompt = (
|
| 285 |
+
f"You are a scholarly reader. Audience: {audience}. Based on the text below, provide:\n"
|
| 286 |
+
"1) A concise abstract (150-300 words).\n"
|
| 287 |
+
"2) Key methods and data sources.\n"
|
| 288 |
+
"3) Main conclusions and an assessment of confidence.\n"
|
| 289 |
+
"4) Limitations and suggestions for future work.\n"
|
| 290 |
+
"5) Suggested references or types of literature to check.\n\n"
|
| 291 |
+
f"Text begins:\n{excerpt}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
)
|
| 293 |
return generate_answer(prompt, backend, openai_key, hf_token, hf_model, local_model)
|
| 294 |
|
| 295 |
|
| 296 |
def math_ui_handler(expr: str, prefer_steps: bool, backend: str, openai_key: str, hf_token: str, hf_model: str, local_model: str):
|
| 297 |
+
return math_solve_or_explain(expr, prefer_steps, backend=backend, openai_key=openai_key, hf_token=hf_token, hf_model=hf_model, local_model=local_model)
|
| 298 |
|
| 299 |
|
| 300 |
# ----------------- Build Gradio UI -----------------
|
|
|
|
| 301 |
def build_ui():
|
| 302 |
with gr.Blocks(title="All-Disciplines Knowledge Assistant (English)") as demo:
|
| 303 |
gr.Markdown("# 🌐 All-Disciplines Knowledge Assistant — English Only")
|
| 304 |
+
# show the model load status in the UI
|
| 305 |
+
gr.Markdown(f"**Local model (default):** `{DEFAULT_LOCAL_MODEL}`")
|
| 306 |
+
gr.Markdown(f"**Current load status:** `{_LOAD_STATUS}`")
|
| 307 |
+
gr.Markdown("This assistant refuses to create quizzes/exams. Provide API keys below to enable OpenAI or Hugging Face Inference. "
|
| 308 |
+
"If you want to use a local model, ensure `transformers` and `torch` are installed and provide the local model name (default above).")
|
| 309 |
|
| 310 |
with gr.Row():
|
| 311 |
with gr.Column(scale=3):
|
|
|
|
| 313 |
audience = gr.Dropdown(label="Audience level", choices=["High School", "Undergraduate", "Graduate", "Expert"], value="Undergraduate")
|
| 314 |
depth = gr.Radio(label="Depth", choices=["overview", "detailed", "technical"], value="detailed")
|
| 315 |
|
| 316 |
+
gr.Markdown("---\n**API keys / tokens (optional)**")
|
|
|
|
| 317 |
openai_key = gr.Textbox(label="OpenAI API Key (paste here)", type="password")
|
| 318 |
hf_token = gr.Textbox(label="Hugging Face API Token (paste here)", type="password")
|
| 319 |
|
| 320 |
+
gr.Markdown("---\n**Backend selection**")
|
| 321 |
+
backend = gr.Dropdown(label="Preferred backend", choices=["transformers_local", "huggingface_inference", "openai", "offline"], value=("transformers_local" if _HAS_TRANSFORMERS else ("huggingface_inference" if not _HAS_TRANSFORMERS else "offline")))
|
|
|
|
| 322 |
|
| 323 |
gr.Markdown("Model settings (for HF / local transformers)")
|
| 324 |
+
hf_model = gr.Textbox(label="Hugging Face Inference model name (e.g. gpt2 or bigscience/bloom)", value=DEFAULT_HF_MODEL)
|
| 325 |
+
local_model = gr.Textbox(label="Local transformers model name (for transformers_local)", value=DEFAULT_LOCAL_MODEL)
|
| 326 |
|
| 327 |
temperature = gr.Slider(label="temperature", minimum=0.0, maximum=1.0, value=0.2, step=0.05)
|
| 328 |
|
| 329 |
+
gr.Markdown("---\n**Conversation**")
|
|
|
|
| 330 |
chatbot = gr.Chatbot(label="Conversation")
|
| 331 |
state = gr.State([])
|
| 332 |
user_input = gr.Textbox(label="Enter your scientific question in English", lines=3)
|
|
|
|
| 356 |
inputs=[expr, prefer_steps, backend, openai_key, hf_token, hf_model, local_model],
|
| 357 |
outputs=[math_out])
|
| 358 |
|
| 359 |
+
gr.Markdown("---\n**Disclaimer**: This assistant may produce incorrect or outdated information. For critical decisions, consult primary literature and domain experts.")
|
|
|
|
|
|
|
| 360 |
return demo
|
| 361 |
|
| 362 |
|
| 363 |
+
# ----------------- Main: load local model synchronously at startup (status shown) -----------------
|
| 364 |
if __name__ == "__main__":
|
| 365 |
+
print("Starting All-Disciplines Knowledge Assistant...")
|
| 366 |
+
print("Optional packages detected: transformers=", _HAS_TRANSFORMERS, "sympy=", _HAS_SYMPY, "PyPDF2=", _HAS_PYPDF2)
|
| 367 |
+
# Attempt to load the default local model synchronously to show startup progress in logs
|
| 368 |
+
if _HAS_TRANSFORMERS:
|
| 369 |
+
print(f"Attempting to load default local model '{DEFAULT_LOCAL_MODEL}'. This may take time and download files. Check logs for progress.")
|
| 370 |
+
set_status("Startup: beginning local model load...")
|
| 371 |
+
load_local_transformers_model(DEFAULT_LOCAL_MODEL)
|
| 372 |
+
else:
|
| 373 |
+
set_status("transformers package not installed; local model unavailable (use HF Inference or OpenAI backends).")
|
| 374 |
+
|
| 375 |
+
# Start Gradio app
|
| 376 |
app = build_ui()
|
| 377 |
app.launch(server_name="0.0.0.0", share=False)
|