TaideResearch / app.py
hsuwill000's picture
Update app.py
3456aec verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AI ่‡ชๅ‹•ๅŒ–็ ”็ฉถ็ณป็ตฑ V7 โ”€ ๆญทๅฒ็ด€้Œ„ๅ…จ้ขๆ”น็”จ Gradio ๅŽŸ็”Ÿๅ…ƒไปถ
ไฟฎๆญฃ่ฆ้ปž๏ผš
ๆญทๅฒ้ ็งป้™ค JS fetch / gr.HTML ๆ–นๆกˆ๏ผŒๆ”น็”จ gr.Dataframe + gr.Dropdown + gr.File
ไธ‹่ผ‰้€้Ž Python callback ๅ›žๅ‚ณๆช”ๆกˆ่ทฏๅพ‘็ตฆ gr.File๏ผŒ็ฉฉๅฎšๅฏ้ 
FastAPI ่ทฏ็”ฑไฟ็•™๏ผˆๅ‚™็”จ๏ผ‰๏ผŒไฝ† UI ไธๅ†ไพ่ณดๅฎƒ
"""
# =============================================================================
# 0. ่‡ชๅ‹•ๅฎ‰่ฃ้žๆจ™ๆบ–ๅฅ—ไปถ
# =============================================================================
import subprocess, sys
def _pip(pkg: str):
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
_REQUIRED = ["gradio>=4.0.0", "gradio-client>=0.8.0", "ddgs",
"requests", "beautifulsoup4", "lxml",
"fastapi", "uvicorn[standard]", "python-multipart"]
print("๐Ÿ“ฆ ๆชขๆŸฅๅฅ—ไปถไพ่ณด...")
for _pkg in _REQUIRED:
_name = _pkg.split(">=")[0].split("[")[0].replace("-", "_")
try:
__import__(_name)
except ImportError:
print(f" ๅฎ‰่ฃ {_pkg}...")
_pip(_pkg)
print("โœ… ๅฅ—ไปถๅฐฑ็ท’\n")
# =============================================================================
# 1-11. ็ ”็ฉถๅผ•ๆ“Ž๏ผˆๅŽŸๅฐไธๅ‹•๏ผ‰
# =============================================================================
import os, re, time, hashlib, logging, threading, math, json, zipfile, uuid, queue
from typing import List, Dict, Optional, Tuple, Set
from datetime import datetime
from urllib.parse import urlparse, unquote, parse_qs
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter, defaultdict
import requests
from bs4 import BeautifulSoup
try:
from ddgs import DDGS
DDGS_AVAILABLE = True
except ImportError:
DDGS_AVAILABLE = False
try:
from gradio_client import Client
GRADIO_AVAILABLE = True
except ImportError:
GRADIO_AVAILABLE = False
# โ”€โ”€ ๆ—ฅ่ชŒ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_LOG_QUEUE: "queue.Queue[str]" = queue.Queue(maxsize=2000)
_SESSION_SENTINEL = "\x00SESSION_START\x00"
class _QueueHandler(logging.Handler):
def emit(self, record):
try:
_LOG_QUEUE.put_nowait(self.format(record))
except queue.Full:
try: _LOG_QUEUE.get_nowait()
except queue.Empty: pass
_LOG_QUEUE.put_nowait(self.format(record))
_FMT = logging.Formatter('%(asctime)s %(levelname)s [%(funcName)s] %(message)s',
datefmt='%H:%M:%S')
_qh = _QueueHandler(); _qh.setFormatter(_FMT)
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s',
handlers=[logging.StreamHandler()])
logging.getLogger().addHandler(_qh)
logger = logging.getLogger(__name__)
# โ”€โ”€ Config โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class Config:
MODEL_POOL = [
{"name": "Llama-3.1-8B", "url": "taide/Llama-3.1-TAIDE-LX-8B-Chat", "priority": 1, "max_tokens": 2048},
{"name": "Llama3-8B-Alpha", "url": "taide/Llama3-TAIDE-LX-8B-Chat-Alpha1", "priority": 2, "max_tokens": 2048},
{"name": "TAIDE-7B", "url": "taide/TAIDE-LX-7B-Chat", "priority": 3, "max_tokens": 2048},
{"name": "Gemma-3-12B", "url": "taide/Gemma-3-TAIDE-12b-Chat", "priority": 4, "max_tokens": 4096},
]
PREFERRED_LARGE_MODEL = "Gemma-3-12B"
MAX_CONCURRENT_LLM_CALLS = 2
MAX_CONCURRENT_FETCH = 8
MAX_CONCURRENT_SEARCH = 3
RETRY_ON_FAILURE = 2
ENABLE_ROUND_ROBIN = True
NUM_SUBTOPICS = 5
QUERIES_PER_SUBTOPIC = 3
NUM_RESEARCH_QUESTIONS = 8
PAGES_PER_QUERY = 3
MAX_SEARCH_ATTEMPTS = 12
MAX_RESULTS_PER_SEARCH = 25
MIN_BODY_LENGTH = 80
FETCH_RESERVE_RATIO = 3
MIN_FULL_CONTENT_LENGTH = 300
MIN_SNIPPET_FALLBACK_LEN = 200
SOURCE_QUALITY_THRESHOLD = 3
MAX_SOURCES_PER_DOMAIN = 3
MIN_CONTENT_LENGTH = 150
LLM_CALL_INTERVAL = 1.5
SUMMARY_LENGTH = 600
SUMMARY_LENGTH_FLEXIBILITY = 0.2
MAX_NEW_TOKENS = 1800
CONTEXT_MAX_LENGTH = 12000
CHUNK_SIZE = 1000
TOP_K_CHUNKS = 6
SECTION_TARGET_WORDS = 450
SECTION_MAX_TOKENS = 1200
ENABLE_DRAFT_CRITIQUE = False
ENABLE_FACT_VALIDATION = True
SEARCH_DELAY = 1.0
TIMEOUT_SECONDS = 25
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
DEDUP_SIMILARITY_THRESHOLD = 0.82
PLAN_TEMPERATURE = 0.9
SUMMARY_TEMPERATURE = 0.7
SCORE_TEMPERATURE = 0.3
REPORT_TEMPERATURE = 0.6
CRITIQUE_TEMPERATURE = 0.5
OUTPUT_DIR = "./sessions"
SAVE_INTERMEDIATE = True
@classmethod
def get_summary_word_range(cls):
lo = int(cls.SUMMARY_LENGTH * (1 - cls.SUMMARY_LENGTH_FLEXIBILITY))
hi = int(cls.SUMMARY_LENGTH * (1 + cls.SUMMARY_LENGTH_FLEXIBILITY))
return f"{lo}-{hi} ๅญ—"
@classmethod
def validate(cls):
return bool(cls.MODEL_POOL)
# โ”€โ”€ TokenTracker โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class TokenTracker:
def __init__(self):
self.total_input = self.total_output = self.call_count = 0
self.model_stats: Dict[str, Dict] = {}
self.call_details: List[Dict] = []
self._lock = threading.Lock()
def record(self, model, inp, out, call_type, success=True):
itok = self._est(inp); otok = self._est(out) if success else 0
with self._lock:
self.total_input += itok; self.total_output += otok; self.call_count += 1
s = self.model_stats.setdefault(model, {"calls":0,"input":0,"output":0,"failures":0})
s["calls"] += 1; s["input"] += itok
if success: s["output"] += otok
else: s["failures"] += 1
self.call_details.append({"model":model,"type":call_type,"in":itok,"out":otok,"ok":success})
def _est(self, text):
if not text: return 0
zh = len(re.findall(r'[\u4e00-\u9fff]', text))
en = len(re.findall(r'[a-zA-Z]', text))
return max(1, int(zh/1.5 + en/4 + (len(text)-zh-en)/3))
def total(self): return self.total_input + self.total_output
def summary(self):
lines = [f"็ธฝๅ‘ผๅซ๏ผš{self.call_count} | ็ธฝ Token๏ผš{self.total():,}"]
for m, s in self.model_stats.items():
lines.append(f" โ€ข {m}: {s['input']+s['output']:,} Token (ๅ‘ผๅซ:{s['calls']} ๅคฑๆ•—:{s['failures']})")
return "\n".join(lines)
def detailed_report(self):
lines = ["=== Token ๆ˜Ž็ดฐ ==="]
for i, c in enumerate(self.call_details, 1):
lines.append(f"{i}. {'โœ…' if c['ok'] else 'โŒ'} [{c['model']}/{c['type']}] in:{c['in']:,} out:{c['out']:,}")
lines.append(f"\n็ธฝ่จˆ๏ผš{self.total():,} Token")
return "\n".join(lines)
# โ”€โ”€ CircuitBreaker & ApiSpec โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
import random as _random
class _CircuitBreaker:
THRESHOLD = 5; RESET_SECS = 90
def __init__(self, name):
self.name = name; self._lock = threading.Lock()
self._failures = 0; self._open_until = 0.0
@property
def is_open(self):
with self._lock:
if self._open_until == 0: return False
if time.time() >= self._open_until:
self._failures = 0; self._open_until = 0
logger.info(f"๐Ÿ”„ ็†”ๆ–ทๅ™จ้‡็ฝฎ๏ผš{self.name}"); return False
return True
def record_success(self):
with self._lock: self._failures = 0; self._open_until = 0
def record_failure(self):
with self._lock:
self._failures += 1
if self._failures >= self.THRESHOLD:
self._open_until = time.time() + self.RESET_SECS
logger.warning(f"โšก ็†”ๆ–ท๏ผš{self.name} ({self._failures}ๆฌก)")
def failure_count(self):
with self._lock: return self._failures
class _ApiSpec:
API_NAME_CANDIDATES = ["/chat", "/predict", "/run/predict", "/infer"]
PARAM_TEMPLATES = [
lambda m,t,x: dict(message=m, temperature=t, max_new_tokens=x),
lambda m,t,x: dict(input=m, temperature=t, max_new_tokens=x),
lambda m,t,x: dict(message=m),
lambda m,t,x: dict(prompt=m, temperature=t, max_tokens=x),
lambda m,t,x: (m,),
]
def __init__(self, name):
self.model_name = name; self._lock = threading.Lock()
self._discovered = False; self.api_name = None
self.param_fn = None; self.use_positional = False
def mark_discovered(self, api_name, param_fn, use_positional=False):
with self._lock:
self.api_name = api_name; self.param_fn = param_fn
self.use_positional = use_positional; self._discovered = True
logger.info(f"โœ… API ๆŽข็ดข๏ผš{self.model_name} โ†’ {api_name}")
@property
def ready(self):
with self._lock: return self._discovered
# โ”€โ”€ MultiModelClient โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class MultiModelClient:
def __init__(self, model_configs, tracker):
self.tracker = tracker
self._configs = {m["name"]: m for m in model_configs}
self._priority = {m["name"]: m["priority"] for m in model_configs}
self._max_tokens = {m["name"]: m["max_tokens"] for m in model_configs}
self._breakers = {m["name"]: _CircuitBreaker(m["name"]) for m in model_configs}
self._specs = {m["name"]: _ApiSpec(m["name"]) for m in model_configs}
self._local = threading.local()
self._rr_counter = 0; self._rr_lock = threading.Lock()
logger.info(f"โœ… MultiModelClient ๅˆๅง‹ๅŒ–๏ผš{len(self._configs)} ๅ€‹ๆจกๅž‹")
def _thread_client(self, name, force_new=False):
if not hasattr(self._local, "clients"): self._local.clients = {}
if force_new or name not in self._local.clients:
cfg = self._configs.get(name)
if not cfg: return None
try:
c = Client(cfg["url"]); self._local.clients[name] = c; return c
except Exception as e:
logger.warning(f"โš ๏ธ {name} Client ๅปบ็ซ‹ๅคฑๆ•—๏ผš{e}")
self._local.clients.pop(name, None); return None
return self._local.clients[name]
def _discover_api(self, name, client):
spec = self._specs[name]
if spec.ready: return True
probe = "ไฝ ๅฅฝ"; discovered = []
try:
info = client.view_api(print_info=False, return_format="dict")
discovered = [k for k in info if isinstance(info[k], dict)]
except Exception: pass
candidates = list(dict.fromkeys(discovered + _ApiSpec.API_NAME_CANDIDATES))
for api_name in candidates:
for i, pfn in enumerate(_ApiSpec.PARAM_TEMPLATES):
try:
params = pfn(probe, 0.5, 10)
r = client.predict(*params, api_name=api_name) if isinstance(params, tuple) \
else client.predict(**params, api_name=api_name)
if r is not None:
spec.mark_discovered(api_name, pfn, isinstance(params, tuple)); return True
except Exception as e:
if "Cannot find" in str(e) or "not found" in str(e).lower(): break
return False
def _call_once(self, name, msg, temp, max_tok, force_new=False):
client = self._thread_client(name, force_new)
if not client: return False, "", "connection"
spec = self._specs[name]
if not spec.ready:
if not self._discover_api(name, client): return False, "", "api"
try:
params = spec.param_fn(msg, temp, max_tok)
r = client.predict(*params, api_name=spec.api_name) if spec.use_positional or isinstance(params, tuple) \
else client.predict(**params, api_name=spec.api_name)
result = str(r).strip() if r is not None else ""
if not result: return False, "", "empty"
return True, result, ""
except Exception as e:
err = str(e)
conn = any(k.lower() in err.lower() for k in ["Connection","WebSocket","timeout","SSL","BrokenPipe"])
if conn: self._local.clients.pop(name, None)
return False, "", "connection" if conn else "api"
def _call_model(self, name, msg, temp, max_tok):
for attempt in range(3):
ok, result, cat = self._call_once(name, msg, temp, max_tok, attempt > 0)
if ok: self._breakers[name].record_success(); return True, result
if cat == "api" and attempt == 0: self._specs[name] = _ApiSpec(name)
if attempt < 2: time.sleep((2**attempt) + _random.uniform(0, 1))
self._breakers[name].record_failure(); return False, ""
def _model_order(self, preferred=None):
all_names = sorted(self._priority, key=lambda x: self._priority[x])
healthy = [n for n in all_names if not self._breakers[n].is_open]
unhealthy = [n for n in all_names if self._breakers[n].is_open]
if not healthy:
healthy = sorted(all_names, key=lambda n: self._breakers[n].failure_count())
if preferred and preferred in healthy:
order = [preferred] + [n for n in healthy if n != preferred]
elif Config.ENABLE_ROUND_ROBIN and len(healthy) > 1:
with self._rr_lock:
idx = self._rr_counter % len(healthy); self._rr_counter += 1
order = [healthy[idx]] + [n for n in healthy if n != healthy[idx]]
else:
order = healthy
return order + unhealthy
def chat(self, message, temperature=0.8, max_tokens=None, call_type="general", preferred=None):
with self._rr_lock:
now = time.time()
if not hasattr(self, '_last_call_time'): self._last_call_time = 0.0
elapsed = now - self._last_call_time
if elapsed < Config.LLM_CALL_INTERVAL: time.sleep(Config.LLM_CALL_INTERVAL - elapsed)
self._last_call_time = time.time()
for name in self._model_order(preferred):
if self._breakers[name].is_open: continue
mt = max_tokens or self._max_tokens.get(name, Config.MAX_NEW_TOKENS)
ok, result = self._call_model(name, message, temperature, mt)
self.tracker.record(name, message, result if ok else "", call_type, ok)
if ok and result: return result
logger.error(f"โŒ ๆ‰€ๆœ‰ๆจกๅž‹ๅคฑๆ•— call_type={call_type}")
return "[้Œฏ่ชค] ็„กๆณ•ๅ–ๅพ—ๅ›žๆ‡‰"
def health_report(self):
lines = ["=== ๆจกๅž‹ๅฅๅบท็‹€ๆ…‹ ==="]
for name in sorted(self._priority, key=lambda x: self._priority[x]):
cb = self._breakers[name]; sp = self._specs[name]
status = "๐Ÿ”ด ็†”ๆ–ทไธญ" if cb.is_open else "๐ŸŸข ๆญฃๅธธ"
api = sp.api_name if sp.ready else "๏ผˆๆœชๆŽข็ดข๏ผ‰"
lines.append(f" {status} {name}๏ฝœ{api}๏ฝœๅคฑๆ•—:{cb.failure_count()}")
return "\n".join(lines)
# โ”€โ”€ ContentDeduplicator โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class ContentDeduplicator:
IGNORED_PARAMS = {'utm_source','utm_medium','ref','fbclid','gclid','si'}
def __init__(self):
self.seen_fps: Set[str] = set()
self.seen_urls: Set[str] = set()
self.domain_counts: Dict[str, int] = defaultdict(int)
self.filtered = 0; self._lock = threading.Lock()
def _norm_url(self, url):
try:
p = urlparse(url); domain = re.sub(r'^[a-z]{2}\.', '', p.netloc.lower())
path = unquote(p.path.rstrip('/'))
if p.query:
q = {k:v for part in p.query.split('&') if '=' in part
for k,v in [part.split('=',1)] if k not in self.IGNORED_PARAMS}
qs = '&'.join(f"{k}={v}" for k,v in sorted(q.items()))
else: qs = ""
return f"{domain}{path}?{qs}" if qs else f"{domain}{path}"
except: return url.lower().strip()
def _fp(self, url, title, content):
ch = hashlib.md5(re.sub(r'\s+', ' ', content[:300]).lower().encode()).hexdigest()[:10]
return f"{self._norm_url(url)}|{re.sub(r'[^\\w\\u4e00-\\u9fff]+','',title.lower())}|{ch}"
def _sim(self, f1, f2):
p1, p2 = f1.split('|'), f2.split('|')
if len(p1) != 3 or len(p2) != 3: return 0.0
u1,t1,c1 = p1; u2,t2,c2 = p2
if u1 == u2: return 1.0
if t1 == t2 and c1 == c2: return 0.95
if c1 == c2: return 0.75
return 0.0
def get_domain(self, url):
try: return urlparse(url).netloc.lower()
except: return url
def is_duplicate(self, url, title, content):
with self._lock:
norm = url.lower().strip()
if norm in self.seen_urls: return True, "URL้‡่ค‡"
domain = self.get_domain(url)
if self.domain_counts[domain] >= Config.MAX_SOURCES_PER_DOMAIN: return True, "ๅŸŸๅ่ถ…้ก"
new_fp = self._fp(url, title, content)
for fp in self.seen_fps:
if self._sim(new_fp, fp) >= Config.DEDUP_SIMILARITY_THRESHOLD: return True, "ๅ…งๅฎน้‡่ค‡"
if 'wikipedia.org' in url.lower():
m = re.search(r'wikipedia\.org/wiki/([^/#?]+)', url)
if m:
art = m.group(1).lower()
for su in self.seen_urls:
sm = re.search(r'wikipedia\.org/wiki/([^/#?]+)', su)
if sm and sm.group(1).lower() == art: return True, "็ถญๅŸบ่ทจ่ชž่จ€"
self.seen_fps.add(new_fp); self.seen_urls.add(norm)
self.domain_counts[domain] += 1
return False, ""
# โ”€โ”€ SmartContextBuilder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class SmartContextBuilder:
def __init__(self, chunk_size=Config.CHUNK_SIZE):
self.chunk_size = chunk_size
def _tokenize(self, text):
zh = list(re.findall(r'[\u4e00-\u9fff]', text))
en = re.findall(r'[a-zA-Z]{2,}', text.lower())
bg = [zh[i]+zh[i+1] for i in range(len(zh)-1)]
return zh + bg + en
def _tf(self, toks):
cnt = Counter(toks); total = max(len(toks), 1)
return {t: c/total for t, c in cnt.items()}
def _idf(self, chunks_tokens, all_terms):
N = len(chunks_tokens)
return {t: math.log((N+1)/(sum(1 for ts in chunks_tokens if t in ts)+1))+1 for t in all_terms}
def _chunk(self, text):
sentences = re.split(r'[ใ€‚๏ผ๏ผŸ\n]', text)
chunks, cur = [], ""
for s in sentences:
s = s.strip()
if not s: continue
if len(cur)+len(s) > self.chunk_size and cur: chunks.append(cur.strip()); cur = s
else: cur += s + "ใ€‚"
if cur.strip(): chunks.append(cur.strip())
return chunks
def build_ranked_context(self, sources, query, top_k=Config.TOP_K_CHUNKS):
query_tokens = set(self._tokenize(query))
all_chunks: List[Tuple[str,str,List[str]]] = []
for src in sources:
full = src.get("full_content") or src.get("content", "")
label = f"[{src.get('title','')[:30]}]"
for chunk in self._chunk(full):
toks = self._tokenize(chunk)
all_chunks.append((chunk, label, toks))
if not all_chunks: return ""
all_terms = set(); [all_terms.update(t) for _,_,t in all_chunks]
idf = self._idf([t for _,_,t in all_chunks], all_terms)
scored = []
for text, label, toks in all_chunks:
if len(text) < 50: continue
tf = self._tf(toks)
score = sum(tf.get(t,0)*idf.get(t,0) for t in query_tokens)
scored.append((score, text, label))
scored.sort(key=lambda x: -x[0])
parts = [f"--- ็‰‡ๆฎต{i} {label} ---\n{text}" for i,(sc,text,label) in enumerate(scored[:top_k],1)]
return "\n\n".join(parts)[:Config.CONTEXT_MAX_LENGTH]
# โ”€โ”€ SourceQualityScorer โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class SourceQualityScorer:
TRUSTED = ['wikipedia.org','britannica.com','reuters.com','bbc.com','nytimes.com',
'myanimelist.net','anilist.co','animenewsnetwork.com','crunchyroll.com']
SPAM = ['click here','buy now','free download','็ซ‹ๅณ่ณผ่ฒท','ๅ…่ฒปไธ‹่ผ‰','casino','ๅšๅผˆ']
NAV = ['search results','page not found','404','ๆœๅฐ‹็ตๆžœ','้ ้ขไธๅญ˜ๅœจ','login required']
def score_source(self, source, topic):
score = 5
content = (source.get("full_content") or source.get("content","")).strip()
title = source.get("title","").strip()
url = source.get("url","").lower()
cl = len(content)
if cl >= 3000: score += 2
elif cl >= 1000: score += 1
elif cl < 150: score -= 2
tt = set(re.findall(r'[\w\u4e00-\u9fff]{2,}', topic.lower()))
tit = set(re.findall(r'[\w\u4e00-\u9fff]{2,}', title.lower()))
score += min(len(tt & tit), 2)
ct = set(re.findall(r'[\w\u4e00-\u9fff]{2,}', content.lower()))
if len(tt & ct) >= len(tt)*0.8: score += 1
elif len(tt & ct) == 0: score -= 1
if any(d in url for d in self.TRUSTED): score += 1
if any(s in content.lower()[:500] for s in self.SPAM): score -= 3
if any(s in content.lower()[:200] for s in self.NAV): score -= 2
sents = [s for s in re.split(r'[ใ€‚๏ผ๏ผŸ.!?\n]', content) if len(s.strip()) > 20]
if len(sents) >= 5: score += 1
return max(0, min(10, score))
def batch_score(self, sources, topic):
scored = [(s, self.score_source(s, topic)) for s in sources]
high = sum(1 for _,sc in scored if sc >= Config.SOURCE_QUALITY_THRESHOLD)
logger.info(f" ๐Ÿ“Š ่ฉ•ๅˆ†ๅฎŒๆˆ๏ผš{len(scored)} ็ญ†๏ผŒ{high} ็ญ†้€š้Ž้–พๅ€ผ")
return scored
# โ”€โ”€ AggressiveSearcher โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class AggressiveSearcher:
def __init__(self, dedup):
self.dedup = dedup; self.ddgs = None
if DDGS_AVAILABLE:
try: self.ddgs = DDGS(); logger.info("โœ… DDGS ๅˆๅง‹ๅŒ–")
except Exception as e: logger.warning(f"โš ๏ธ DDGS๏ผš{e}")
def _ddgs_search(self, query, n):
if not self.ddgs: return []
for backend in ["html", "lite"]:
try:
raw = list(self.ddgs.text(query=query, region="us-en", safesearch="off",
backend=backend, max_results=n))
out = []
for r in raw:
url = r.get("href") or r.get("link") or r.get("url","")
title = r.get("title") or r.get("headline","็„กๆจ™้กŒ")
body = r.get("body") or r.get("snippet") or r.get("description","")
if not url or len(body) < Config.MIN_BODY_LENGTH: continue
if any(x in url.lower() for x in ['bing.com/search','duckduckgo.com/html']): continue
out.append({"url":url,"title":title,"content":body[:3000],
"timestamp":datetime.now().isoformat()})
if out: return out
except Exception as e: logger.warning(f"[{backend}] {str(e)[:80]}")
return []
def _fallback_search(self, query, n):
try:
r = requests.post("https://html.duckduckgo.com/html/",
data={"q":query,"kl":"us-en"},
headers={"User-Agent":Config.USER_AGENT},
timeout=Config.TIMEOUT_SECONDS, verify=True)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser"); out = []
for item in soup.find_all("div", class_="result")[:n]:
ta = item.find("a", class_="result__a"); sa = item.find("a", class_="result__snippet")
if not ta: continue
title = ta.get_text(strip=True); href = ta.get("href","")
if href.startswith("/l/?"):
params = parse_qs(urlparse(href).query); url = params.get("uddg",[None])[0]
if not url: continue
else: url = href
body = sa.get_text(strip=True) if sa else ""
if not url or len(body) < Config.MIN_BODY_LENGTH: continue
out.append({"url":url,"title":title,"content":body[:3000],
"timestamp":datetime.now().isoformat()})
return out
except Exception as e: logger.error(f"[ๅ‚™็”จๆœๅฐ‹] {e}"); return []
def fetch_full_content(self, url):
try:
r = requests.get(url, headers={"User-Agent":Config.USER_AGENT},
timeout=Config.TIMEOUT_SECONDS, verify=True)
r.raise_for_status()
soup = BeautifulSoup(r.content, "lxml")
for tag in soup(['script','style','nav','footer','header','aside']): tag.decompose()
main = soup.find('article') or soup.find('main') or \
soup.find('div', class_=re.compile(r'content|article|post|body', re.I))
target = main if main else soup
text = target.get_text(separator='\n')
lines = [l.strip() for l in text.split('\n') if l.strip() and len(l.strip()) > 10]
return '\n'.join(lines[:200]) if lines else None
except: return None
def search_candidates(self, query, target, label=""):
need = target * Config.FETCH_RESERVE_RATIO
valid: List[Dict] = []; attempt = 0; seen: Set[str] = set()
while len(valid) < need and attempt < Config.MAX_SEARCH_ATTEMPTS:
attempt += 1
raw = self._ddgs_search(query, max(need*2, Config.MAX_RESULTS_PER_SEARCH))
if not raw: raw = self._fallback_search(query, Config.MAX_RESULTS_PER_SEARCH)
if not raw: time.sleep(Config.SEARCH_DELAY); continue
for item in raw:
if item['url'].lower() in seen: continue
dup, _ = self.dedup.is_duplicate(item['url'], item['title'], item['content'])
if dup: continue
valid.append(item); seen.add(item['url'].lower())
if len(valid) >= need: break
time.sleep(Config.SEARCH_DELAY)
return valid
def search_with_fetch_fallback(self, query, target, label=""):
candidates = self.search_candidates(query, target, label)
if not candidates: return []
confirmed: List[Dict] = []
for i, src in enumerate(candidates):
if len(confirmed) >= target: break
full = self.fetch_full_content(src["url"])
if full and len(full) >= Config.MIN_FULL_CONTENT_LENGTH:
src["full_content"] = full; src["fetch_status"] = "full"; confirmed.append(src)
logger.info(f" โœ… [{label}] ({len(confirmed)}/{target}) ๅ…จๆ–‡ๆˆๅŠŸ {src['url'][:55]}")
elif len(src.get("content","")) >= Config.MIN_SNIPPET_FALLBACK_LEN:
src["full_content"] = src["content"]; src["fetch_status"] = "snippet_fallback"
confirmed.append(src)
logger.info(f" โš ๏ธ [{label}] ({len(confirmed)}/{target}) snippet้™็ดš {src['url'][:55]}")
else:
logger.info(f" โŒ [{label}] ่ทณ้Ž๏ผŒๅ‚™ๆดๆฑ ๅ‰ฉ {len(candidates)-i-1}")
return confirmed
def search_multi_query(self, queries, pages_per_query):
all_results: List[Dict] = []
with ThreadPoolExecutor(max_workers=Config.MAX_CONCURRENT_SEARCH) as ex:
futures = {ex.submit(self.search_with_fetch_fallback,q,pages_per_query,f"Q{i}"):q
for i,q in enumerate(queries)}
for f in as_completed(futures):
try: all_results.extend(f.result())
except Exception as e: logger.warning(f" โš ๏ธ ๆŸฅ่ฉขๅคฑๆ•—๏ผš{e}")
return all_results
# โ”€โ”€ ResearchPlanner โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class ResearchPlanner:
def __init__(self, llm): self.llm = llm
def generate_plan(self, topic):
logger.info("๐Ÿ“‹ ็”Ÿๆˆ็ ”็ฉถ่จˆๅŠƒ...")
prompt = f"""ไฝ ๆ˜ฏไธ€ไฝๅฐˆๆฅญ็ ”็ฉถ่ฆๅŠƒๅธซใ€‚่ซ‹็‚บไปฅไธ‹ไธป้กŒๅˆถๅฎš็ ”็ฉถ่จˆๅŠƒใ€‚
็ ”็ฉถไธป้กŒ๏ผš{topic}
่ซ‹่ผธๅ‡บไปฅไธ‹ๆ ผๅผ๏ผˆๅšดๆ ผ้ตๅฎˆ๏ผŒๆฏ่กŒไธ€ๆข๏ผ‰๏ผš
=== ๅญไธป้กŒ ===
[ๅˆ—ๅ‡บ {Config.NUM_SUBTOPICS} ๅ€‹้œ€่ฆๆทฑๅ…ฅ็ ”็ฉถ็š„ๅญไธป้กŒ๏ผŒๆฏ่กŒไธ€ๅ€‹]
=== ็ ”็ฉถๅ•้กŒ ===
[ๅˆ—ๅ‡บ {Config.NUM_RESEARCH_QUESTIONS} ๅ€‹ๅ…ท้ซ”็š„็ ”็ฉถๅ•้กŒ๏ผŒๆฏ่กŒไธ€ๅ€‹]
=== ๆœๅฐ‹ๆŸฅ่ฉข ===
[ๅˆ—ๅ‡บ {Config.NUM_SUBTOPICS * Config.QUERIES_PER_SUBTOPIC} ๆขๅคšๆจฃๅŒ–็š„ๆœๅฐ‹ๆŸฅ่ฉข๏ผˆ็น้ซ”/็ฐก้ซ”/่‹ฑๆ–‡ๆททๅˆ๏ผ‰๏ผŒๆฏ่กŒไธ€ๅ€‹]"""
raw = self.llm.chat(prompt, temperature=Config.PLAN_TEMPERATURE,
max_tokens=2000, call_type="research_plan",
preferred=Config.PREFERRED_LARGE_MODEL)
return self._parse_plan(raw, topic)
def _parse_section(self, text, header):
m = re.search(rf'=== {re.escape(header)} ===(.*?)(?====|$)', text, re.DOTALL)
if not m: return []
return [l.strip() for l in m.group(1).split('\n')
if l.strip() and not l.strip().startswith('[') and len(l.strip()) > 3]
def _parse_plan(self, raw, topic):
subtopics = self._parse_section(raw, "ๅญไธป้กŒ")
questions = self._parse_section(raw, "็ ”็ฉถๅ•้กŒ")
queries = self._parse_section(raw, "ๆœๅฐ‹ๆŸฅ่ฉข")
if not subtopics:
subtopics = [f"{topic}็š„ๆญทๅฒ่ƒŒๆ™ฏ",f"{topic}็š„ไธป่ฆ็‰นๅพต",
f"{topic}็š„ๅฝฑ้Ÿฟ่ˆ‡ๆ„็พฉ",f"{topic}็š„็™ผๅฑ•็พๆณ",f"{topic}็š„ๆœชไพ†ๅฑ•ๆœ›"]
if not questions:
questions = [f"{topic}ๆ˜ฏไป€้บผ๏ผŸ",f"{topic}็š„่ตทๆบ๏ผŸ",f"{topic}็š„้‡่ฆ็‰น้ปž๏ผŸ",f"{topic}็š„็คพๆœƒๅฝฑ้Ÿฟ๏ผŸ"]
if not queries:
queries = [topic] + subtopics[:Config.NUM_SUBTOPICS]
logger.info(f"โœ… ่จˆๅŠƒ๏ผš{len(subtopics)} ๅญไธป้กŒ๏ผŒ{len(questions)} ๅ•้กŒ๏ผŒ{len(queries)} ๆŸฅ่ฉข")
return {"subtopics":subtopics,"questions":questions,"queries":queries}
# โ”€โ”€ ResearchSystem โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
class ResearchSystem:
def __init__(self):
if not GRADIO_AVAILABLE: raise ImportError("gradio_client ๆœชๅฎ‰่ฃ")
self.tracker = TokenTracker()
self.dedup = ContentDeduplicator()
self.llm = MultiModelClient(Config.MODEL_POOL, self.tracker)
self.searcher = AggressiveSearcher(self.dedup)
self.planner = ResearchPlanner(self.llm)
self.scorer = SourceQualityScorer()
self.ctx_builder = SmartContextBuilder()
self.all_sources: List[Dict] = []
self._src_lock = threading.Lock()
def phase0_plan(self, topic):
logger.info("="*60); logger.info("๐Ÿ“Œ PHASE 0๏ผš็ ”็ฉถ่ฆๅŠƒ")
return self.planner.generate_plan(topic)
def phase1_search_and_fetch(self, topic, plan):
logger.info("="*60); logger.info("๐Ÿ“Œ PHASE 1๏ผšๆœๅฐ‹๏ผ‹ๅ‚™ๆดๆŠ“ๅ–๏ผ‹่ฉ•ๅˆ†")
all_q = [topic] + plan["queries"]
seen_q: Set[str] = set(); uq = []
for q in all_q:
if q.lower() not in seen_q: seen_q.add(q.lower()); uq.append(q)
logger.info(f" ๐Ÿ” ไธฆ่กŒๅŸท่กŒ {len(uq)} ๆขๆŸฅ่ฉข...")
raw = self.searcher.search_multi_query(uq, Config.PAGES_PER_QUERY)
logger.info(f" ๐Ÿ“„ ๅ‚™ๆดๆŠ“ๅ–ๅพŒ๏ผš{len(raw)} ็ฏ‡")
if not raw: raise RuntimeError("โŒ Phase 1 ๆœๅฐ‹็„ก็ตๆžœ")
scored = self.scorer.batch_score(raw, topic)
quality = [(s,sc) for s,sc in scored if sc >= Config.SOURCE_QUALITY_THRESHOLD]
quality.sort(key=lambda x: -x[1])
filtered = [s for s,_ in quality] or raw
logger.info(f" โœ… ๅ“่ณช้Žๆฟพ๏ผš{len(filtered)}/{len(raw)} ็ฏ‡ไฟ็•™")
with self._src_lock: self.all_sources.extend(filtered)
return filtered
def phase2_subtopic_analysis(self, topic, sources, plan):
logger.info("="*60); logger.info("๐Ÿ“Œ PHASE 2๏ผšๅญไธป้กŒๆทฑๅบฆๅˆ†ๆž")
summaries: Dict[str,str] = {}
def analyze(st):
ctx = self.ctx_builder.build_ranked_context(sources, f"{topic} {st}", top_k=Config.TOP_K_CHUNKS)
lo = int(Config.SUMMARY_LENGTH*(1-Config.SUMMARY_LENGTH_FLEXIBILITY))
hi = int(Config.SUMMARY_LENGTH*(1+Config.SUMMARY_LENGTH_FLEXIBILITY))
prompt = f"""ไฝ ๆ˜ฏไธ€ไฝๅฐˆๆฅญ็ ”็ฉถๅ“กใ€‚ๆทฑๅบฆๅˆ†ๆžไธป้กŒ๏ผš{topic} โ”€ ๅญไธป้กŒ๏ผš{st}
ใ€ๅˆ†ๆž่ฆๆฑ‚ใ€‘
1. ๅ…ท้ซ”่ซ–้ปž + ่ณ‡ๆ–™ไบ‹ๅฏฆๆ”ฏๆ’
2. ๅˆ†ๆžๆทฑๅบฆ๏ผš{lo}-{hi} ๅญ—๏ผŒ็น้ซ”ไธญๆ–‡
3. ็ตๆง‹๏ผš่ƒŒๆ™ฏโ†’ๆ ธๅฟƒโ†’็ดฐ็ฏ€โ†’ๅฐ็ต
4. ๆŒ‡ๅ‡บไธๅŒ่ง€้ปžๆˆ–็ˆญ่ญฐ๏ผˆๅฆ‚ๆœ‰๏ผ‰
ใ€็›ธ้—œ่ณ‡ๆ–™็‰‡ๆฎตใ€‘
{ctx}
่ซ‹่ผธๅ‡บ้‡ๅฐใ€Œ{st}ใ€็š„ๆทฑๅบฆๅˆ†ๆž๏ผš"""
r = self.llm.chat(prompt, temperature=Config.SUMMARY_TEMPERATURE,
max_tokens=Config.MAX_NEW_TOKENS, call_type=f"subtopic_{st[:20]}")
logger.info(f" โœ… ๅญไธป้กŒๅฎŒๆˆ๏ผš{st[:30]} ({len(r)} ๅญ—ๅ…ƒ)")
return st, r
with ThreadPoolExecutor(max_workers=Config.MAX_CONCURRENT_LLM_CALLS) as ex:
futures = {ex.submit(analyze, st): st for st in plan["subtopics"]}
for f in as_completed(futures):
try: st, r = f.result(); summaries[st] = r
except Exception as e: logger.warning(f" โš ๏ธ {futures[f]}๏ผš{e}")
logger.info(f"[Phase 2] โœ… ๅฎŒๆˆ {len(summaries)} ๅ€‹ๅญไธป้กŒ")
return summaries
def phase3_fact_validation(self, topic, subtopic_summaries, sources):
if not Config.ENABLE_FACT_VALIDATION: return ""
logger.info("="*60); logger.info("๐Ÿ“Œ PHASE 3๏ผšไบ‹ๅฏฆ้ฉ—่ญ‰")
top2 = dict(list(subtopic_summaries.items())[:2])
st = "\n\n".join([f"ใ€{k}ใ€‘\n{v[:600]}" for k,v in top2.items()])
ctx = self.ctx_builder.build_ranked_context(sources, topic, top_k=4)
prompt = f"""่ซ‹ๆ ธๆŸฅไปฅไธ‹็ ”็ฉถๆ‘˜่ฆ็š„ๆบ–็ขบๆ€งใ€‚
ไธป้กŒ๏ผš{topic}
ใ€ๅพ…ๆ ธๆŸฅๅ…งๅฎนใ€‘
{st}
ใ€ๅŽŸๅง‹่ณ‡ๆ–™ใ€‘
{ctx[:2000]}
่ซ‹ๅˆ—ๅ‡บ๏ผš
1. ๅทฒ็ขบ่ช็š„้‡่ฆไบ‹ๅฏฆ๏ผˆ3-5ๆข๏ผ‰
2. ๆœ‰็–‘ๅ•ๆˆ–้œ€่ฃœๅ……็š„ๅœฐๆ–น๏ผˆ2-3ๆข๏ผ‰
ๆ ผๅผ๏ผšโœ… ็ขบ่ช๏ผš[ไบ‹ๅฏฆ] / โš ๏ธ ๅพ…ๆŸฅ๏ผš[ๅ•้กŒ]"""
r = self.llm.chat(prompt, temperature=Config.CRITIQUE_TEMPERATURE,
max_tokens=Config.MAX_NEW_TOKENS, call_type="fact_validation")
logger.info(f"[Phase 3] โœ… ้ฉ—่ญ‰ๅฎŒๆˆ ({len(r)} ๅญ—ๅ…ƒ)")
return r
def _build_outline(self, topic, plan):
st = plan["subtopics"]; qs = plan["questions"]
roman = ["ไธ€","ไบŒ","ไธ‰","ๅ››","ไบ”","ๅ…ญ","ไธƒ","ๅ…ซ","ไน","ๅ","ๅไธ€","ๅไบŒ","ๅไธ‰","ๅๅ››","ๅไบ”"]
out = []
out.append({"id":"abstract","heading":"## ไธ€ใ€ๅŸท่กŒๆ‘˜่ฆ","title":"ๅŸท่กŒๆ‘˜่ฆ","query":topic,
"template":f"่ซ‹็‚บไธป้กŒใ€Œ{topic}ใ€ๆ’ฐๅฏซๅญธ่ก“ๆ€งๅŸท่กŒๆ‘˜่ฆใ€‚\nๆถต่“‹๏ผš็ ”็ฉถ่ƒŒๆ™ฏใ€ๆ ธๅฟƒ็™ผ็พใ€ไธป่ฆ็ต่ซ–ใ€‚\n็ด„ {Config.SECTION_TARGET_WORDS} ๅญ—๏ผŒๆฎต่ฝๆ•˜่ฟฐใ€‚",
"top_k":4,"max_tokens":Config.SECTION_MAX_TOKENS})
out.append({"id":"background","heading":"## ไบŒใ€็ ”็ฉถ่ƒŒๆ™ฏ่ˆ‡็›ฎ็š„","title":"็ ”็ฉถ่ƒŒๆ™ฏ่ˆ‡็›ฎ็š„",
"query":f"{topic} ๆญทๅฒ่ƒŒๆ™ฏ่ตทๆบ",
"template":f"่ซ‹่ชชๆ˜Žใ€Œ{topic}ใ€็š„็ ”็ฉถ่ƒŒๆ™ฏ๏ผš\n1. ่ตทๆบ่ˆ‡็™ผๅฑ•่„ˆ็ตก\n2. ็ ”็ฉถ้‡่ฆๆ€ง\n3. ๆœฌๅ ฑๅ‘Š็ ”็ฉถ็›ฎ็š„\n็ด„ {Config.SECTION_TARGET_WORDS} ๅญ—ใ€‚",
"top_k":5,"max_tokens":Config.SECTION_MAX_TOKENS})
for i,s in enumerate(st):
rn = roman[i+2] if i+2 < len(roman) else str(i+3)
out.append({"id":f"subtopic_{i}","heading":f"## {rn}ใ€{s}","title":s,
"query":f"{topic} {s}",
"template":f"่ซ‹ๆทฑๅ…ฅๅˆ†ๆžใ€Œ{topic}ใ€ไธญ้—œๆ–ผใ€Œ{s}ใ€็š„้ขๅ‘ใ€‚\n1. ๅ…ท้ซ”่ซ–้ปž+่ณ‡ๆ–™ไบ‹ๅฏฆ\n2. ้‡่ฆๆ€ง่ˆ‡ๅฝฑ้Ÿฟ\n3. ไธๅŒ่ง€้ปž๏ผˆๅฆ‚ๆœ‰๏ผ‰\n็ด„ {Config.SECTION_TARGET_WORDS} ๅญ—๏ผŒๅญธ่ก“่ซ–ๆ–‡่ชžๆฐฃใ€‚",
"top_k":Config.TOP_K_CHUNKS,"max_tokens":Config.SECTION_MAX_TOKENS})
rn_qa = roman[len(st)+2] if len(st)+2 < len(roman) else "ๅ•้กŒ"
ql = "\n".join([f"โ€ข {q}" for q in qs[:5]])
out.append({"id":"qa","heading":f"## {rn_qa}ใ€ๆ ธๅฟƒๅ•้กŒ่งฃ็ญ”","title":"ๆ ธๅฟƒๅ•้กŒ่งฃ็ญ”","query":topic,
"template":f"่ซ‹้‡ๅฐไปฅไธ‹็ ”็ฉถๅ•้กŒ๏ผŒ้€ไธ€็ฐกๆ˜Žๅ›ž็ญ”๏ผš\n{ql}\nๆฏ้กŒ 2-4 ๅฅ๏ผŒ็ธฝ่จˆ็ด„ {Config.SECTION_TARGET_WORDS} ๅญ—ใ€‚",
"top_k":5,"max_tokens":Config.SECTION_MAX_TOKENS})
rn_d = roman[len(st)+3] if len(st)+3 < len(roman) else "่จŽ่ซ–"
out.append({"id":"discussion","heading":f"## {rn_d}ใ€็ถœๅˆ่จŽ่ซ–","title":"็ถœๅˆ่จŽ่ซ–",
"query":f"{topic} ๅฝฑ้Ÿฟ ๅˆ†ๆž",
"template":f"่ซ‹ๅฐใ€Œ{topic}ใ€ๅ„้ขๅ‘้€ฒ่กŒ็ถœๅˆ่จŽ่ซ–๏ผš\n1. ๅ„ๅญไธป้กŒ้–“็›ธไบ’้—œไฟ‚\n2. ็ ”็ฉถไพท้™่ˆ‡ๆœช่งฃๅ•้กŒ\n3. ่ˆ‡็›ธ้—œ้ ˜ๅŸŸๆฏ”่ผƒ\n็ด„ {Config.SECTION_TARGET_WORDS} ๅญ—ใ€‚",
"top_k":5,"max_tokens":Config.SECTION_MAX_TOKENS})
rn_c = roman[len(st)+4] if len(st)+4 < len(roman) else "็ต่ซ–"
out.append({"id":"conclusion","heading":f"## {rn_c}ใ€็ต่ซ–่ˆ‡ๆœชไพ†ๅฑ•ๆœ›","title":"็ต่ซ–่ˆ‡ๆœชไพ†ๅฑ•ๆœ›",
"query":f"{topic} ็ต่ซ– ๆœชไพ† ๅฑ•ๆœ›",
"template":f"่ซ‹ๆ’ฐๅฏซใ€Œ{topic}ใ€็ ”็ฉถ็š„็ต่ซ–๏ผš\n1. ๆ ธๅฟƒ็™ผ็พๆ‘˜่ฆ๏ผˆ3-4้ปž๏ผ‰\n2. ๆœชไพ†็™ผๅฑ•ๆ–นๅ‘\n3. ๅฐ่ฎ€่€…็š„ๅปบ่ญฐ\n็ด„ {Config.SECTION_TARGET_WORDS} ๅญ—ใ€‚",
"top_k":4,"max_tokens":Config.SECTION_MAX_TOKENS})
return out
def _generate_section(self, section, topic, sources, prev_summary, validation_notes):
ctx = self.ctx_builder.build_ranked_context(sources, section["query"], top_k=section["top_k"])
ctx = ctx[:Config.CONTEXT_MAX_LENGTH-800]
prev = f"\nใ€ๅ‰็ฏ€ๅทฒๆถต่“‹๏ผˆๆœฌ็ฏ€่ซ‹ๅ‹ฟ้‡่ค‡๏ผ‰ใ€‘\n{prev_summary[:300]}\n" if prev_summary else ""
val = f"\nใ€ๅทฒ้ฉ—่ญ‰้—œ้ตไบ‹ๅฏฆใ€‘\n{validation_notes[:400]}\n" \
if validation_notes and section["id"] in ("abstract","background") else ""
prompt = f"""ไฝ ๆ˜ฏไธ€ไฝๅฐˆๆฅญๅญธ่ก“็ ”็ฉถๅ“ก๏ผŒๆญฃๅœจๆ’ฐๅฏซ้—œๆ–ผใ€Œ{topic}ใ€็š„ๅฐ่ซ–ๆ–‡ใ€‚
{prev}{val}
ใ€ๅƒ่€ƒ่ณ‡ๆ–™๏ผˆ่ซ‹ๅพžไธญๆๅ–ๅ…ท้ซ”ไบ‹ๅฏฆ๏ผ‰ใ€‘
{ctx}
ใ€ๆœฌ็ฏ€ๆ’ฐๅฏซไปปๅ‹™ใ€‘
{section["template"]}
ๆณจๆ„๏ผš
- ไฝฟ็”จ็น้ซ”ไธญๆ–‡๏ผŒๆฎต่ฝๅผๅญธ่ก“ๅฏซไฝœ
- ๅฟ…้ ˆๅผ•็”จ่ณ‡ๆ–™ไธญ็š„ๅ…ท้ซ”ๅ…งๅฎน๏ผˆไบบๅใ€ๆ•ธๅญ—ใ€ไบ‹ไปถ๏ผ‰
- ไธ่ฆ้‡่ค‡ๅ‰็ฏ€๏ผŒไธ่ฆ่ผธๅ‡บๆจ™้กŒ่กŒ
้–‹ๅง‹ๆ’ฐๅฏซ๏ผš"""
r = self.llm.chat(prompt, temperature=Config.REPORT_TEMPERATURE,
max_tokens=section["max_tokens"], call_type=f"section_{section['id']}")
lines = r.strip().split("\n")
cleaned = [l for l in lines if not (l.strip().startswith("#") and section["title"] in l)]
return "\n".join(cleaned).strip()
def phase4_generate_report(self, topic, plan, subtopic_summaries, validation_notes):
logger.info("="*60); logger.info("๐Ÿ“Œ PHASE 4๏ผšๅˆ†็ฏ€ๆจกๆฟๅผๅ ฑๅ‘Š็”Ÿๆˆ")
outline = self._build_outline(topic, plan); total = len(outline)
logger.info(f" ๐Ÿ“‹ ๅคง็ถฑ๏ผš{total} ็ฏ€")
sections_output: List[Tuple[str,str]] = []; prev = ""
for i, sec in enumerate(outline, 1):
logger.info(f" โœ๏ธ [{i}/{total}] {sec['title']}")
aug = list(self.all_sources)
for st, summ in subtopic_summaries.items():
aug.append({"url":f"internal://{st}","title":f"ๅญไธป้กŒ๏ผš{st}",
"content":summ,"full_content":summ,"fetch_status":"full"})
content = self._generate_section(sec, topic, aug, prev, validation_notes)
if not content or content.startswith("[้Œฏ่ชค]"):
content = subtopic_summaries.get(sec["title"], f"๏ผˆ{sec['title']} ็”Ÿๆˆๅคฑๆ•—๏ผ‰")
sections_output.append((sec["heading"], content))
logger.info(f" โœ… [{i}/{total}] {sec['title']} ({len(content)} ๅญ—ๅ…ƒ)")
prev = content[:300]
body = f"# {topic}๏ผšๆทฑๅบฆ็ ”็ฉถๅ ฑๅ‘Š\n" + "".join(f"\n{h}\n\n{c}" for h,c in sections_output)
body = self._strip_llm_references(body)
logger.info(f"[Phase 4] โœ… {total} ็ฏ€๏ผŒ{sum(len(c) for _,c in sections_output):,} ๅญ—ๅ…ƒ")
return body
def _build_references_section(self, sources):
seen: Set[str] = set(); unique: List[Dict] = []
for s in sources:
if s["url"].lower().strip() not in seen:
unique.append(s); seen.add(s["url"].lower().strip())
today = datetime.now().strftime("%Y-%m-%d")
lines = ["","---","","## ๅƒ่€ƒ่ณ‡ๆ–™","",
f"> ๆœฌๅ ฑๅ‘Šๅ…ฑๅผ•็”จ {len(unique)} ็ญ†่ณ‡ๆ–™ไพ†ๆบ๏ผŒๅญ˜ๅ–ๆ—ฅๆœŸ๏ผš{today}",""]
for i, src in enumerate(unique, 1):
title = src.get("title","๏ผˆ็„กๆจ™้กŒ๏ผ‰").strip()
url = src.get("url","").strip()
try: site = re.sub(r'^www\.', '', urlparse(url).netloc.lower())
except: site = url
lines += [f"[{i}] **{title}**", f" - ไพ†ๆบ็ถฒ็ซ™๏ผš{site}",
f" - ้€ฃ็ต๏ผš<{url}>", f" - ๅญ˜ๅ–ๆ—ฅๆœŸ๏ผš{today}", ""]
return "\n".join(lines), unique
def _strip_llm_references(self, report):
patterns = [r'\n#{1,3}\s*(ๅƒ่€ƒๆ–‡็ป|ๅƒ่€ƒ่ณ‡ๆ–™|References|Bibliography|ๅผ•็”จไพ†ๆบ)[^\n]*',
r'\n\*\*(ๅƒ่€ƒๆ–‡็ป|ๅƒ่€ƒ่ณ‡ๆ–™|References)\*\*[^\n]*']
earliest = len(report)
for pat in patterns:
m = re.search(pat, report, re.IGNORECASE)
if m and m.start() < earliest: earliest = m.start()
return report[:earliest] if earliest < len(report) else report
def _save(self, path, content):
try:
with open(path, 'w', encoding='utf-8') as f: f.write(content)
except Exception as e: logger.warning(f"โš ๏ธ ๅ„ฒๅญ˜ๅคฑๆ•— {path}๏ผš{e}")
def execute_gui(self, topic: str, work_dir: str) -> Dict:
os.makedirs(work_dir, exist_ok=True)
start = time.time(); logger.info(f"๐Ÿš€ ้–‹ๅง‹็ ”็ฉถ๏ผš'{topic}'")
try:
plan = self.phase0_plan(topic)
self._save(os.path.join(work_dir,"phase0_plan.txt"),
"# ็ ”็ฉถ่จˆๅŠƒ\n\n## ๅญไธป้กŒ\n"+"\n".join(f"- {s}" for s in plan["subtopics"])+
"\n\n## ็ ”็ฉถๅ•้กŒ\n"+"\n".join(f"- {q}" for q in plan["questions"])+
"\n\n## ๆœๅฐ‹ๆŸฅ่ฉข\n"+"\n".join(f"- {q}" for q in plan["queries"]))
sources = self.phase1_search_and_fetch(topic, plan)
self._save(os.path.join(work_dir,"phase1_sources.txt"),
"\n".join(f"{i}. {s['title'][:60]} | {s['url']}" for i,s in enumerate(sources,1)))
subtopic_summaries = self.phase2_subtopic_analysis(topic, sources, plan)
self._save(os.path.join(work_dir,"phase2_subtopics.txt"),
"\n\n".join(f"## {k}\n{v}" for k,v in subtopic_summaries.items()))
validation = self.phase3_fact_validation(topic, subtopic_summaries, sources)
if validation: self._save(os.path.join(work_dir,"phase3_validation.txt"), validation)
report_body = self.phase4_generate_report(topic, plan, subtopic_summaries, validation)
refs_section, unique_sources = self._build_references_section(sources)
report = report_body.rstrip() + "\n\n" + refs_section
elapsed = time.time() - start
total_tok = self.tracker.total()
header = (f"# ๐Ÿ“Š ็ ”็ฉถๅ ฑๅ‘Š๏ผš{topic}\n\n"
f"> **ๆ™‚้–“**๏ผš{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
f"> **่€—ๆ™‚**๏ผš{elapsed:.1f} ็ง’\n"
f"> **็ธฝ Token**๏ผš{total_tok:,}\n"
f"> **ไพ†ๆบ**๏ผš{len(unique_sources)} ็ญ†\n"
f"> **ๅญไธป้กŒ**๏ผš{len(subtopic_summaries)} ๅ€‹\n\n---\n\n")
full_report = header + report
report_path = os.path.join(work_dir, "report_final.md")
self._save(report_path, full_report)
self._save(os.path.join(work_dir,"token_usage.txt"),
f"# Token\n{self.tracker.detailed_report()}")
logger.info(f"โœ… ๅฎŒๆˆ๏ผ{elapsed:.1f}s | Token:{total_tok:,}")
logger.info(self.llm.health_report())
return {"success":True,"report":full_report,"elapsed":elapsed,
"total_tok":total_tok,"sources_count":len(unique_sources),
"work_dir":work_dir,"subtopics":list(subtopic_summaries.keys()),
"queries":plan.get("queries",[])}
except Exception as e:
import traceback; tb = traceback.format_exc()
logger.critical(f"โŒ ็ต‚ๆญข๏ผš{e}\n{tb}")
return {"success":False,"error":str(e),"work_dir":work_dir}
# =============================================================================
# 12. HistoryManager
# =============================================================================
HISTORY_ROOT = "./history"
HISTORY_INDEX = os.path.join(HISTORY_ROOT, "index.json")
_hist_lock = threading.Lock()
def _ensure_history_dir():
os.makedirs(HISTORY_ROOT, exist_ok=True)
if not os.path.exists(HISTORY_INDEX):
with open(HISTORY_INDEX, 'w', encoding='utf-8') as f: json.dump([], f)
def _load_index() -> List[Dict]:
_ensure_history_dir()
try:
with open(HISTORY_INDEX, 'r', encoding='utf-8') as f: return json.load(f)
except: return []
def _save_index(index: List[Dict]):
_ensure_history_dir()
with open(HISTORY_INDEX, 'w', encoding='utf-8') as f:
json.dump(index, f, ensure_ascii=False, indent=2)
def save_to_history(topic: str, result: Dict):
with _hist_lock:
index = _load_index()
sid = str(uuid.uuid4())[:8]
entry = {
"id": sid,
"topic": topic,
"timestamp": datetime.now().isoformat(),
"elapsed": result.get("elapsed", 0),
"sources": result.get("sources_count", 0),
"tok": result.get("total_tok", 0),
"work_dir": result.get("work_dir",""),
"keywords": _extract_keywords(topic, result),
}
index.insert(0, entry)
_save_index(index)
return sid
def _extract_keywords(topic: str, result: Dict) -> List[str]:
words: Set[str] = set()
def _add(text: str):
chs = re.findall(r'[\u4e00-\u9fff]', text)
words.update(chs)
words.update(chs[i]+chs[i+1] for i in range(len(chs)-1))
words.update(w.lower() for w in re.findall(r'[a-zA-Z]{2,}', text))
_add(topic)
for st in result.get("subtopics", []): _add(st)
for q in result.get("queries", []): _add(q)
return sorted(words)
def fuzzy_search_history(query: str, threshold: float = 0.3) -> List[Dict]:
index = _load_index()
if not query.strip(): return index
q_words: Set[str] = set()
chs = re.findall(r'[\u4e00-\u9fff]', query)
q_words.update(chs)
q_words.update(chs[i]+chs[i+1] for i in range(len(chs)-1))
q_words.update(w.lower() for w in re.findall(r'[a-zA-Z]{2,}', query))
if not q_words: return index
scored = []
for entry in index:
kw_set = set(entry.get("keywords", []))
if not kw_set: continue
hits = len(q_words & kw_set)
coverage = hits / len(q_words)
if query.strip() in entry.get("topic", ""):
coverage = max(coverage, 1.0)
if coverage >= threshold:
scored.append((coverage, entry))
scored.sort(key=lambda x: -x[0])
return [e for _, e in scored]
def make_zip(work_dirs: List[str], zip_name: str) -> str:
os.makedirs("./zips", exist_ok=True)
zip_path = f"./zips/{zip_name}.zip"
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for wd in work_dirs:
if not os.path.isdir(wd): continue
folder_name = os.path.basename(wd)
for fname in os.listdir(wd):
fpath = os.path.join(wd, fname)
if os.path.isfile(fpath):
zf.write(fpath, arcname=f"{folder_name}/{fname}")
return zip_path
# =============================================================================
# 13. ResearchRunner + ResearchQueue
# =============================================================================
PENDING_QUEUE_FILE = os.path.join(HISTORY_ROOT, "_pending_queue.json")
def _save_pending_to_disk(pending: List[str]):
try:
_ensure_history_dir()
with open(PENDING_QUEUE_FILE, 'w', encoding='utf-8') as f:
json.dump({"pending": pending, "saved_at": datetime.now().isoformat()}, f)
except Exception as e:
logger.warning(f"ๅพ…่พฆไฝ‡ๅˆ—ๅญ˜ๆช”ๅคฑๆ•—๏ผš{e}")
def _load_pending_from_disk() -> List[str]:
try:
if os.path.exists(PENDING_QUEUE_FILE):
with open(PENDING_QUEUE_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
pending = data.get("pending", [])
if pending:
logger.info(f"ๅพž็ฃ็ขŸๆขๅพฉๅพ…่พฆไฝ‡ๅˆ—๏ผš{len(pending)} ๅ€‹")
return pending
except Exception as e:
logger.warning(f"ๅพ…่พฆไฝ‡ๅˆ—่ฎ€ๅ–ๅคฑๆ•—๏ผš{e}")
return []
class ResearchRunner:
def __init__(self):
self._lock = threading.Lock()
self.running = False
self.status = "idle"
self.result: Optional[Dict] = None
self.topic = ""
self.zip_path: Optional[str] = None
def start(self, topic: str) -> bool:
with self._lock:
if self.running: return False
self.running = True; self.result = None
self.status = "running"; self.topic = topic; self.zip_path = None
_LOG_QUEUE.put(_SESSION_SENTINEL)
threading.Thread(target=self._run, args=(topic,), daemon=True).start()
return True
def _run(self, topic: str):
sid = str(uuid.uuid4())[:8]
work_dir = os.path.join(HISTORY_ROOT,
f"{sid}_{re.sub(r'[^\\w]','_',topic)[:20]}")
try:
sys_inst = ResearchSystem()
result = sys_inst.execute_gui(topic, work_dir)
if result["success"]:
save_to_history(topic, result)
self.zip_path = make_zip(
[work_dir],
f"{sid}_{re.sub(r'[^\\w]','_',topic)[:20]}"
)
with self._lock:
self.result = result
self.status = "done" if result["success"] else "error"
self.running = False
except Exception as e:
logger.critical(f"Runner ๅคฑๆ•—๏ผš{e}")
with self._lock:
self.result = {"success": False, "error": str(e), "work_dir": work_dir}
self.status = "error"; self.running = False
def get_state(self) -> Dict:
with self._lock:
return {"running": self.running, "status": self.status,
"result": self.result, "topic": self.topic,
"zip_path": self.zip_path}
class ResearchQueue:
def __init__(self):
self._lock = threading.Lock()
self._pending: List[str] = _load_pending_from_disk()
self._runner = ResearchRunner()
threading.Thread(target=self._watcher, daemon=True).start()
def enqueue(self, topic: str) -> str:
topic = topic.strip()
if not topic: return "โš ๏ธ ่ซ‹่ผธๅ…ฅ็ ”็ฉถไธป้กŒ"
with self._lock:
state = self._runner.get_state()
if not state["running"] and not self._pending:
self._runner.start(topic)
_save_pending_to_disk(self._pending)
return f"๐ŸŸข ็›ดๆŽฅ้–‹ๅง‹็ ”็ฉถ๏ผš{topic}"
else:
self._pending.append(topic)
_save_pending_to_disk(self._pending)
return f"โœ… ๅทฒๅŠ ๅ…ฅไฝ‡ๅˆ—๏ผˆ็ฌฌ {len(self._pending)} ไฝ็ญ‰ๅพ…๏ผ‰๏ผš{topic}"
def remove(self, idx: int) -> str:
with self._lock:
if 0 <= idx < len(self._pending):
removed = self._pending.pop(idx)
_save_pending_to_disk(self._pending)
return f"๐Ÿ—‘ ๅทฒ็งป้™ค๏ผš{removed}"
return "โš ๏ธ ็„กๆ•ˆ็š„็ดขๅผ•"
def get_full_state(self) -> Dict:
with self._lock:
return {"runner": self._runner.get_state(),
"pending": list(self._pending)}
def _watcher(self):
while True:
time.sleep(2)
with self._lock:
state = self._runner.get_state()
if not state["running"] and self._pending:
next_topic = self._pending.pop(0)
_save_pending_to_disk(self._pending)
self._runner.start(next_topic)
_queue = ResearchQueue()
# =============================================================================
# 14. ๆญทๅฒ็ด€้Œ„ๆ“ไฝœๅ‡ฝๅผ๏ผˆ็ด” Python๏ผŒ็ตฆ Gradio ๅ›žๅ‘ผ็”จ๏ผ‰
# =============================================================================
def _entry_to_row(e: Dict) -> List:
"""ๆŠŠ index entry ่ฝ‰ๆˆ Dataframe ไธ€่กŒ"""
ts = e.get("timestamp","")[:16].replace("T"," ")
tok = e.get("tok", 0)
tok_str = f"{tok:,}" if tok else "0"
return [
e.get("id",""),
e.get("topic",""),
ts,
e.get("sources", 0),
f"{round(e.get('elapsed', 0))}s",
tok_str,
]
def _build_dropdown_choices(entries: List[Dict]) -> List[str]:
return [f"[{e.get('id','')}] {e.get('topic','')}" for e in entries]
def _id_from_choice(choice: str) -> str:
"""ๅพž '[abc1] ไธป้กŒ' ๆๅ– id"""
m = re.match(r'^\[([^\]]+)\]', choice or "")
return m.group(1) if m else ""
# โ”€โ”€ Tab 2 ๅ›žๅ‘ผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def cb_hist_load(search_query: str = ""):
"""่ผ‰ๅ…ฅ/ๆœๅฐ‹ๆญทๅฒ๏ผŒๆ›ดๆ–ฐ Dataframe ่ˆ‡ Dropdown"""
entries = fuzzy_search_history(search_query) if search_query.strip() else _load_index()
rows = [_entry_to_row(e) for e in entries]
choices = _build_dropdown_choices(entries)
status = f"ๅ…ฑ **{len(entries)}** ็ญ†็ด€้Œ„" + (f"๏ผˆๆœๅฐ‹๏ผš{search_query}๏ผ‰" if search_query.strip() else "")
return (
gr.update(value=rows if rows else []),
gr.update(choices=choices, value=None),
gr.update(value=status),
gr.update(value=""), # ๆธ…็ฉบ้ ่ฆฝ
gr.update(visible=False), # ้šฑ่—ไธ‹่ผ‰
)
def cb_hist_preview(choice: str):
"""้ ่ฆฝ้ธๅ–็š„ๅ ฑๅ‘Š"""
entry_id = _id_from_choice(choice)
if not entry_id:
return gr.update(value="โš ๏ธ ่ซ‹ๅ…ˆๅœจไธ‹ๆ‹‰้ธๅ–ฎไธญ้ธๆ“‡ไธ€็ญ†็ด€้Œ„"), gr.update(visible=False)
index = _load_index()
entry = next((e for e in index if e.get("id") == entry_id), None)
if not entry:
return gr.update(value="โš ๏ธ ๆ‰พไธๅˆฐๆญค็ด€้Œ„"), gr.update(visible=False)
wd = entry.get("work_dir","")
rep_path = os.path.join(wd, "report_final.md") if wd else ""
if not rep_path or not os.path.exists(rep_path):
return gr.update(value="โš ๏ธ ๆ‰พไธๅˆฐๅ ฑๅ‘Šๆช”ๆกˆ"), gr.update(visible=False)
with open(rep_path, 'r', encoding='utf-8') as f:
content = f.read()
return gr.update(value=content), gr.update(visible=False)
def cb_hist_download_one(choice: str):
"""ไธ‹่ผ‰ๅ–ฎ็ญ† ZIP๏ผŒๅ›žๅ‚ณๆช”ๆกˆ่ทฏๅพ‘็ตฆ gr.File"""
entry_id = _id_from_choice(choice)
if not entry_id:
return gr.update(visible=False, value=None)
index = _load_index()
entry = next((e for e in index if e.get("id") == entry_id), None)
if not entry:
return gr.update(visible=False, value=None)
wd = entry.get("work_dir","")
if not wd or not os.path.isdir(wd):
return gr.update(visible=False, value=None)
topic_safe = re.sub(r'[^\w]','_', entry.get("topic","unknown"))[:20]
zip_path = make_zip([wd], f"{entry_id}_{topic_safe}")
return gr.update(visible=True, value=zip_path)
def cb_hist_download_all():
"""ๆ‰“ๅŒ…ๅ…จ้ƒจๆญทๅฒ็‚บ ZIP"""
entries = _load_index()
work_dirs = [e.get("work_dir","") for e in entries if e.get("work_dir","")]
if not work_dirs:
return gr.update(visible=False, value=None)
stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
zip_path = make_zip(work_dirs, f"all_history_{stamp}")
return gr.update(visible=True, value=zip_path)
# =============================================================================
# 15. Gradio UI
# =============================================================================
import gradio as gr
DARK_CSS = """
body, .gradio-container {
background-color: #0d1117 !important;
color: #c9d1d9 !important;
font-family: 'Inter', 'Segoe UI', sans-serif;
}
.tab-nav button {
background: #161b22 !important; color: #8b949e !important;
border: 1px solid #30363d !important; border-radius: 8px 8px 0 0 !important;
font-size: 14px !important; padding: 8px 20px !important;
}
.tab-nav button.selected, .tab-nav button:hover {
background: #1f6feb !important; color: #fff !important;
border-color: #1f6feb !important;
}
.gr-box, .gradio-box, .block, .form {
background: #161b22 !important; border: 1px solid #30363d !important;
border-radius: 10px !important;
}
input[type=text], textarea, .gr-input, .gr-textarea {
background: #0d1117 !important; color: #c9d1d9 !important;
border: 1px solid #30363d !important; border-radius: 8px !important;
}
input[type=text]:focus, textarea:focus {
border-color: #1f6feb !important;
box-shadow: 0 0 0 2px rgba(31,111,235,0.25) !important;
}
button.primary, .gr-button-primary {
background: linear-gradient(135deg,#1f6feb,#388bfd) !important;
color: #fff !important; border: none !important; border-radius: 8px !important;
font-weight: 600 !important; padding: 10px 22px !important;
}
button.primary:hover { opacity:.88; }
button.secondary, .gr-button-secondary {
background: #21262d !important; color: #c9d1d9 !important;
border: 1px solid #30363d !important; border-radius: 8px !important;
}
button.secondary:hover { border-color: #388bfd !important; }
.gr-markdown, .prose {
background: #161b22 !important; color: #c9d1d9 !important;
padding: 16px !important; border-radius: 10px !important;
border: 1px solid #30363d !important;
max-height: 520px; overflow-y: auto; line-height: 1.7;
}
.prose h1,.prose h2,.prose h3 { color: #79c0ff !important; }
.prose a { color: #58a6ff !important; }
.prose code { background:#0d1117 !important; color:#ffa657 !important;
border-radius:4px; padding:2px 6px; }
.prose blockquote { border-left:3px solid #388bfd; padding-left:12px; color:#8b949e; }
.prose hr { border-color:#30363d; }
#log_box textarea {
background: #010409 !important; color: #3fb950 !important;
font-family: 'Fira Code','Consolas',monospace !important;
font-size: 12px !important; border: 1px solid #1a7f37 !important;
border-radius: 8px !important; line-height: 1.5;
}
#app-header { text-align:center; padding:20px 0 10px; }
#app-header h1 { color:#e6edf3; font-size:26px; font-weight:700; margin:0; }
#app-header p { color:#8b949e; font-size:13px; margin:4px 0 0; }
"""
# โ”€โ”€ ่ผ”ๅŠฉๅ‡ฝๅผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _queue_current_text(runner: Dict) -> str:
s = runner.get("status", "idle")
t = runner.get("topic", "")
if s == "running":
return f"๐Ÿ”„ **็ ”็ฉถไธญ๏ผš** {t}\n\n> ็ ”็ฉถๅœจ่ƒŒๆ™ฏๅŸท่กŒ๏ผŒ้—œ้–‰้ ้ขๅพŒไปๆœƒ็นผ็บŒ๏ผŒๅฏๅ›žไพ†ๆŸฅ็œ‹็ตๆžœใ€‚"
elif s == "done" and t:
return f"โœ… **ๅฎŒๆˆ๏ผš** {t}"
elif s == "error" and t:
return f"โŒ **้Œฏ่ชค๏ผš** {t}"
return "โšช ๅพ…ๅ‘ฝไธญ๏ผˆ็„ก้€ฒ่กŒไธญ็š„็ ”็ฉถ๏ผ‰"
def _pending_choices(pending: List[str]) -> List[str]:
return [f"[{i+1}] {t}" for i, t in enumerate(pending)]
def _drain_log_queue() -> List[str]:
lines = []
while True:
try: lines.append(_LOG_QUEUE.get_nowait())
except queue.Empty: break
return lines
# โ”€โ”€ Tab 1 ๅ›žๅ‘ผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def cb_enqueue(topic: str):
msg = _queue.enqueue(topic)
full = _queue.get_full_state()
runner, pending = full["runner"], full["pending"]
return (
gr.update(value=f"๐Ÿ’ฌ {msg}"),
gr.update(value=_queue_current_text(runner)),
gr.update(choices=_pending_choices(pending), value=None),
gr.update(value=""),
)
def cb_delete_pending(selected_label: str):
if not selected_label:
full = _queue.get_full_state()
return (gr.update(value="โš ๏ธ ่ซ‹ๅ…ˆๅœจไธ‹ๆ‹‰้ธๅ–ฎไธญ้ธๅ–่ฆๅˆช้™ค็š„้ …็›ฎ"),
gr.update(value=_queue_current_text(full["runner"])),
gr.update(choices=_pending_choices(full["pending"]), value=None))
try:
idx = int(selected_label.split("]")[0].lstrip("[")) - 1
msg = _queue.remove(idx)
except (ValueError, IndexError):
msg = "โš ๏ธ ๅˆช้™คๅคฑๆ•—๏ผŒ่ซ‹้‡่ฉฆ"
full = _queue.get_full_state()
runner = full["runner"]
return (gr.update(value=f"๐Ÿ’ฌ {msg}"),
gr.update(value=_queue_current_text(runner)),
gr.update(choices=_pending_choices(full["pending"]), value=None))
def cb_poll():
full = _queue.get_full_state()
runner = full["runner"]
pending = full["pending"]
current_upd = gr.update(value=_queue_current_text(runner))
pending_upd = gr.update(choices=_pending_choices(pending), value=None)
if runner["status"] == "running":
return (current_upd, pending_upd,
gr.update(value=f"๐ŸŸข **็ ”็ฉถไธญ๏ผš{runner['topic']}** โ€” ่ซ‹็จๅ€™..."),
gr.update(),
gr.update(visible=False), gr.update(visible=False))
elif runner["status"] == "done" and runner.get("result"):
r = runner["result"]
stat = (f"โœ… **ๅฎŒๆˆ๏ผ** | ่€—ๆ™‚ {r.get('elapsed',0):.0f}s "
f"| ไพ†ๆบ {r.get('sources_count',0)} ็ฏ‡ "
f"| Token {r.get('total_tok',0):,}")
zip_p = runner.get("zip_path")
return (current_upd, pending_upd,
gr.update(value=stat),
gr.update(value=r.get("report","")),
gr.update(visible=bool(zip_p), value=zip_p),
gr.update(visible=True))
elif runner["status"] == "error":
err = (runner.get("result") or {}).get("error","ๆœช็Ÿฅ้Œฏ่ชค")
return (current_upd, pending_upd,
gr.update(value=f"๐Ÿ”ด **้Œฏ่ชค๏ผš** {err}"),
gr.update(),
gr.update(visible=False), gr.update(visible=False))
else:
return (current_upd, pending_upd,
gr.update(), gr.update(),
gr.update(visible=False), gr.update(visible=False))
def cb_load_latest():
index = _load_index()
if not index:
return (gr.update(value="โš ๏ธ ๅฐš็„กๆญทๅฒ็ด€้Œ„"), gr.update(),
gr.update(visible=False), gr.update(visible=False))
latest = index[0]
work_dir = latest.get("work_dir","")
rep_path = os.path.join(work_dir, "report_final.md") if work_dir else ""
if not rep_path or not os.path.exists(rep_path):
return (gr.update(value="โš ๏ธ ๆ‰พไธๅˆฐๆœ€ๆ–ฐๅ ฑๅ‘Šๆช”ๆกˆ๏ผŒ่ซ‹ๅ‰ๅพ€ใ€Œๆญทๅฒ็ด€้Œ„ใ€ๅˆ†้ "), gr.update(),
gr.update(visible=False), gr.update(visible=False))
with open(rep_path, 'r', encoding='utf-8') as f:
report_content = f.read()
topic = latest.get("topic","ๆœช็Ÿฅไธป้กŒ")
status_txt = (f"๐Ÿ“‚ **่ผ‰ๅ…ฅๆญทๅฒ๏ผš{topic}** | "
f"่€—ๆ™‚ {latest.get('elapsed',0):.0f}s | "
f"ไพ†ๆบ {latest.get('sources',0)} ็ฏ‡")
sid = latest.get("id","unknown")
topic_safe = re.sub(r'[^\w]','_',topic)[:20]
zip_path = make_zip([work_dir], f"{sid}_{topic_safe}")
return (gr.update(value=status_txt),
gr.update(value=report_content),
gr.update(visible=True, value=zip_path),
gr.update(visible=True))
def cb_clear_report():
return (gr.update(value="โšช ๅพ…ๅ‘ฝไธญ"),
gr.update(value=""),
gr.update(visible=False),
gr.update(visible=False))
# โ”€โ”€ Tab 3 ๅ›žๅ‘ผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_log_buffer: List[str] = []
def cb_refresh_log():
global _log_buffer
for line in _drain_log_queue():
if line == _SESSION_SENTINEL:
_log_buffer.clear()
else:
_log_buffer.append(line)
if len(_log_buffer) > 600:
_log_buffer = _log_buffer[-600:]
full = _queue.get_full_state()
runner = full["runner"]
pending_cnt = len(full["pending"])
badge_map = {"running":"๐ŸŸข ็ ”็ฉถไธญ","done":"๐Ÿ”ต ๅฎŒๆˆ","error":"๐Ÿ”ด ้Œฏ่ชค","idle":"โšช ๅพ…ๅ‘ฝ"}
badge = badge_map.get(runner["status"],"?")
status_txt = f"็ณป็ตฑ็‹€ๆ…‹๏ผš{badge}"
if pending_cnt > 0:
status_txt += f" | ็ญ‰ๅพ…ไฝ‡ๅˆ—๏ผš{pending_cnt} ๅ€‹"
return gr.update(value="\n".join(_log_buffer)), gr.update(value=status_txt)
def cb_clear_log():
global _log_buffer
_log_buffer.clear()
while True:
try: _LOG_QUEUE.get_nowait()
except queue.Empty: break
return gr.update(value="")
# =============================================================================
# ๅปบ็ซ‹ Gradio Blocks
# =============================================================================
_HIST_DF_HEADERS = ["ID", "ไธป้กŒ", "ๆ™‚้–“", "ไพ†ๆบๆ•ธ", "่€—ๆ™‚", "Token"]
_HIST_DF_TYPES = ["str", "str", "str", "number", "str", "str"]
with gr.Blocks(css=DARK_CSS, theme=gr.themes.Base(), title="AI ็ ”็ฉถ็ณป็ตฑ V7") as demo:
gr.HTML("""
<div id="app-header">
<h1>๐Ÿ”ฌ AI ่‡ชๅ‹•ๅŒ–็ ”็ฉถ็ณป็ตฑ V7</h1>
<p>็ ”็ฉถๅœจไผบๆœๅ™จ่ƒŒๆ™ฏๅŸท่กŒ ยท ้—œ้–‰้ ้ขๅพŒไป็นผ็บŒ ยท ๆญทๅฒ็ด€้Œ„ๅ…จ้ขๆ”น็”จ Gradio ๅŽŸ็”Ÿๅ…ƒไปถ๏ผˆ็ฉฉๅฎšๅฏ้ ๏ผ‰</p>
</div>
""")
with gr.Tabs():
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# TAB 1 โ€” ็ ”็ฉถ + ไฝ‡ๅˆ—็ฎก็†
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
with gr.Tab("๐Ÿ” ็ ”็ฉถ", id="tab_research"):
with gr.Row():
with gr.Column(scale=5):
t1_topic = gr.Textbox(
label="็ ”็ฉถไธป้กŒ",
placeholder="่ผธๅ…ฅไธป้กŒๅพŒๆŒ‰ใ€ŒๅŠ ๅ…ฅไฝ‡ๅˆ—ใ€๏ผŒๅฏ้€ฃ็บŒๆŽ’็จ‹ๅคšๅ€‹ไธป้กŒ...",
lines=1,
)
with gr.Column(scale=1, min_width=130):
t1_enqueue_btn = gr.Button("โž• ๅŠ ๅ…ฅไฝ‡ๅˆ—", variant="primary", size="lg")
t1_msg = gr.Markdown(value="")
with gr.Row():
with gr.Column(scale=1, min_width=260):
gr.Markdown("#### ๐Ÿ“ก ็ ”็ฉถไฝ‡ๅˆ—")
t1_current_md = gr.Markdown(
value="โšช ๅพ…ๅ‘ฝไธญ๏ผˆ็„ก้€ฒ่กŒไธญ็š„็ ”็ฉถ๏ผ‰",
label="็›ฎๅ‰็‹€ๆ…‹",
)
t1_pending_dd = gr.Dropdown(
label="โณ ็ญ‰ๅพ…ไธญ็š„็ ”็ฉถ๏ผˆ้ธๅ–ๅพŒๅฏๅˆช้™ค๏ผ‰",
choices=[], value=None, interactive=True,
)
t1_del_btn = gr.Button("๐Ÿ—‘ ๅˆช้™ค้ธๅ–็š„็ญ‰ๅพ…้ …็›ฎ", variant="secondary", size="sm")
with gr.Column(scale=3):
t1_status_md = gr.Markdown(value="โšช ๅพ…ๅ‘ฝไธญ")
with gr.Row():
t1_load_latest_btn = gr.Button(
"๐Ÿ“‚ ่ผ‰ๅ…ฅๆœ€ๆ–ฐๆญทๅฒ็ตๆžœ", variant="secondary", size="sm"
)
t1_clear_btn = gr.Button(
"๐Ÿ”„ ๆธ…้™คๅ ฑๅ‘Šๅ€", variant="secondary", size="sm", visible=False
)
t1_download = gr.File(
label="๐Ÿ“ฆ ไธ‹่ผ‰ๆœฌๆฌก็ ”็ฉถๅŒ…๏ผˆZIP๏ผ‰",
visible=False, interactive=False,
)
t1_report = gr.Markdown(value="", label="็ ”็ฉถๅ ฑๅ‘Š", elem_classes=["prose"])
t1_timer = gr.Timer(value=3)
_enqueue_outputs = [t1_msg, t1_current_md, t1_pending_dd, t1_topic]
t1_enqueue_btn.click(cb_enqueue, inputs=[t1_topic], outputs=_enqueue_outputs)
t1_topic.submit(cb_enqueue, inputs=[t1_topic], outputs=_enqueue_outputs)
t1_del_btn.click(
cb_delete_pending, inputs=[t1_pending_dd],
outputs=[t1_msg, t1_current_md, t1_pending_dd],
)
t1_timer.tick(
cb_poll,
outputs=[t1_current_md, t1_pending_dd,
t1_status_md, t1_report, t1_download, t1_clear_btn],
)
t1_load_latest_btn.click(
cb_load_latest,
outputs=[t1_status_md, t1_report, t1_download, t1_clear_btn],
)
t1_clear_btn.click(
cb_clear_report,
outputs=[t1_status_md, t1_report, t1_download, t1_clear_btn],
)
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# TAB 2 โ€” ๆญทๅฒ็ด€้Œ„
# โ˜… ๅฎŒๅ…จไฝฟ็”จ Gradio ๅŽŸ็”Ÿๅ…ƒไปถ + Python ๅ›žๅ‘ผ โ˜…
# โ˜… ไธๅ†ไพ่ณด JS fetch / gr.HTML / FastAPI ่ทฏ็”ฑ โ˜…
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
with gr.Tab("๐Ÿ“š ๆญทๅฒ็ด€้Œ„", id="tab_history"):
# โ”€โ”€ ๆœๅฐ‹ๅˆ— โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Row():
hist_search_input = gr.Textbox(
label="้—œ้ตๅญ—ๆœๅฐ‹",
placeholder="ไพ‹ๅฆ‚๏ผš้‡ๅญๅŠ›ๅญธ / anime / climate...",
lines=1, scale=4,
)
hist_search_btn = gr.Button("๐Ÿ”Ž ๆœๅฐ‹", variant="primary", scale=1, min_width=90)
hist_refresh_btn = gr.Button("๐Ÿ”„ ้‡ๆ–ฐๆ•ด็†", variant="secondary", scale=1, min_width=100)
hist_dl_all_btn = gr.Button("๐Ÿ“ฆ ไธ‹่ผ‰ๅ…จ้ƒจ", variant="secondary", scale=1, min_width=100)
hist_status_md = gr.Markdown(value="้ปžๆ“Šใ€Œ้‡ๆ–ฐๆ•ด็†ใ€่ผ‰ๅ…ฅๆญทๅฒ็ด€้Œ„")
# โ”€โ”€ ๆญทๅฒ่กจๆ ผ๏ผˆDataframe๏ผŒๅ”ฏ่ฎ€๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
hist_df = gr.Dataframe(
headers=_HIST_DF_HEADERS,
datatype=_HIST_DF_TYPES,
value=[],
interactive=False,
wrap=False,
label="ๆญทๅฒ็ด€้Œ„ๅˆ—่กจ",
row_count=(8, "dynamic"),
col_count=(len(_HIST_DF_HEADERS), "fixed"),
)
gr.Markdown("---\n#### ๐Ÿ”Ž ้ธๆ“‡ไธ€็ญ†็ด€้Œ„้€ฒ่กŒ้ ่ฆฝ / ไธ‹่ผ‰")
# โ”€โ”€ ้ธๅ– + ๆ“ไฝœ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Row():
hist_select_dd = gr.Dropdown(
label="้ธๆ“‡็ด€้Œ„๏ผˆID + ไธป้กŒ๏ผ‰",
choices=[], value=None, interactive=True, scale=4,
)
hist_preview_btn = gr.Button("๐Ÿ‘ ้ ่ฆฝๅ ฑๅ‘Š", variant="primary", scale=1, min_width=100)
hist_dl_one_btn = gr.Button("โฌ‡ ไธ‹่ผ‰ ZIP", variant="secondary", scale=1, min_width=100)
# โ”€โ”€ ไธ‹่ผ‰่ผธๅ‡บ๏ผˆgr.File๏ผŒ็”ฑ Python callback ๅกซๅ…ฅ๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
hist_dl_file = gr.File(
label="๐Ÿ“ฆ ZIP ไธ‹่ผ‰๏ผˆ้ปžๆ“Šๅณไธ‹่ผ‰๏ผ‰",
visible=False, interactive=False,
)
# โ”€โ”€ ๅ ฑๅ‘Š้ ่ฆฝ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
hist_preview_md = gr.Markdown(
value="",
label="ๅ ฑๅ‘Š้ ่ฆฝ",
elem_classes=["prose"],
)
# โ”€โ”€ ไบ‹ไปถ็ถๅฎš โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_hist_load_outputs = [hist_df, hist_select_dd, hist_status_md,
hist_preview_md, hist_dl_file]
hist_refresh_btn.click(
fn=lambda: cb_hist_load(""),
outputs=_hist_load_outputs,
)
hist_search_btn.click(
fn=cb_hist_load,
inputs=[hist_search_input],
outputs=_hist_load_outputs,
)
hist_search_input.submit(
fn=cb_hist_load,
inputs=[hist_search_input],
outputs=_hist_load_outputs,
)
hist_preview_btn.click(
fn=cb_hist_preview,
inputs=[hist_select_dd],
outputs=[hist_preview_md, hist_dl_file],
)
hist_dl_one_btn.click(
fn=cb_hist_download_one,
inputs=[hist_select_dd],
outputs=[hist_dl_file],
)
hist_dl_all_btn.click(
fn=cb_hist_download_all,
outputs=[hist_dl_file],
)
# ๅˆ‡ๆ›ๅˆฐๆญค Tab ๆ™‚่‡ชๅ‹•ๅˆทๆ–ฐ
demo.load(
fn=lambda: cb_hist_load(""),
outputs=_hist_load_outputs,
)
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# TAB 3 โ€” ๅŸท่กŒๆ—ฅ่ชŒ
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
with gr.Tab("๐Ÿ“ก ๅŸท่กŒๆ—ฅ่ชŒ", id="tab_log"):
with gr.Row():
t3_status_label = gr.Markdown(value="็ณป็ตฑ็‹€ๆ…‹๏ผšโšช ๅพ…ๅ‘ฝ")
t3_clear_btn = gr.Button("๐Ÿ—‘ ๆธ…้™คๆ—ฅ่ชŒ", variant="secondary", size="sm")
t3_log = gr.Textbox(
label="ๅณๆ™‚ๆ—ฅ่ชŒ๏ผˆๆฏๆฌกๆ–ฐ็ ”็ฉถ้–‹ๅง‹่‡ชๅ‹•ๆธ…้™ค่ˆŠ session๏ผ‰",
lines=30, max_lines=30, interactive=False,
elem_id="log_box", autoscroll=True,
)
gr.HTML("""<script>
setInterval(function(){
var ta = document.querySelector('#log_box textarea');
if(ta) ta.scrollTop = ta.scrollHeight;
}, 2200);
</script>""")
t3_timer = gr.Timer(value=2)
t3_timer.tick(cb_refresh_log, outputs=[t3_log, t3_status_label])
t3_clear_btn.click(cb_clear_log, outputs=[t3_log])
# =============================================================================
# 16. ๅ•Ÿๅ‹•๏ผˆไฟ็•™ FastAPI ๅ‚™็”จ่ทฏ็”ฑ๏ผŒไฝ† UI ไธๅ†ไพ่ณดๅฎƒ๏ผ‰
# =============================================================================
from fastapi import FastAPI
from fastapi.responses import JSONResponse, FileResponse, Response
from fastapi import Query as FastQuery
fastapi_app = FastAPI(title="AI Research API")
@fastapi_app.get("/api/history")
async def api_get_history(q: str = FastQuery(default="")):
try:
entries = fuzzy_search_history(q) if q.strip() else _load_index()
slim = [{"id":e.get("id",""),"topic":e.get("topic",""),
"timestamp":e.get("timestamp","")[:16].replace("T"," "),
"sources":e.get("sources",0),"elapsed":round(e.get("elapsed",0)),
"tok":e.get("tok",0)} for e in entries]
return JSONResponse(slim)
except Exception as e:
return JSONResponse({"error": str(e)}, status_code=500)
@fastapi_app.get("/api/download/{entry_id}")
async def api_download_entry(entry_id: str):
try:
index = _load_index()
entry = next((e for e in index if e.get("id") == entry_id), None)
if not entry: return Response("ๆ‰พไธๅˆฐๆญค็ญ†็ด€้Œ„", status_code=404)
wd = entry.get("work_dir","")
if not wd or not os.path.isdir(wd): return Response("ๅทฅไฝœ็›ฎ้Œ„ไธๅญ˜ๅœจ", status_code=404)
topic_safe = re.sub(r'[^\w]','_', entry.get("topic","unknown"))[:20]
zip_path = make_zip([wd], f"{entry_id}_{topic_safe}")
fname = f"{entry_id}_{topic_safe}.zip"
return FileResponse(zip_path, media_type="application/zip", filename=fname,
headers={"Content-Disposition": f'attachment; filename="{fname}"'})
except Exception as e:
return Response(f"ไธ‹่ผ‰ๅคฑๆ•—๏ผš{e}", status_code=500)
import gradio as gr
mounted_app = gr.mount_gradio_app(fastapi_app, demo, path="/")
if __name__ == "__main__":
import uvicorn
_ensure_history_dir()
os.makedirs("./sessions", exist_ok=True)
os.makedirs("./zips", exist_ok=True)
if not DDGS_AVAILABLE: print("โš ๏ธ ddgs ๅฅ—ไปถๆœชๅฎ‰่ฃ๏ผŒๆœๅฐ‹ๅŠŸ่ƒฝๅ—้™")
if not GRADIO_AVAILABLE: print("โš ๏ธ gradio_client ๆœชๅฎ‰่ฃ๏ผŒLLM ๅ‘ผๅซๅ—้™")
print("๐Ÿš€ ๅ•Ÿๅ‹•ไธญ๏ผšhttp://0.0.0.0:7860")
uvicorn.run(mounted_app, host="0.0.0.0", port=7860, log_level="warning")