Spaces:
Sleeping
Sleeping
Update tools_v2.py
Browse files- tools_v2.py +132 -464
tools_v2.py
CHANGED
|
@@ -1,399 +1,157 @@
|
|
| 1 |
-
"""
|
| 2 |
-
tools_v2.py - SPECTER2 + HDBSCAN + UMAP thematic analysis tools.
|
| 3 |
-
COMPLETELY INDEPENDENT from tools.py (v1). No shared state, no ordering dependency.
|
| 4 |
-
V2 can be run before, after, or without ever running V1.
|
| 5 |
-
|
| 6 |
-
SPECTER2 is allenai/specter2_base β a local HuggingFace model.
|
| 7 |
-
NO API KEY required. Downloads once, cached automatically.
|
| 8 |
-
|
| 9 |
-
Pipeline:
|
| 10 |
-
1. Combined Title+Abstract per paper β SPECTER2 embedding (768-dim)
|
| 11 |
-
2. UMAP (cosine, 5D) β tight document clusters
|
| 12 |
-
3. HDBSCAN β 15-30 clusters, 5-120 papers each
|
| 13 |
-
4. Council-of-3-LLMs β 3 Mistral-small expert personas β mode vote
|
| 14 |
-
5. PAJAIS mapping + audit CSV + narrative
|
| 15 |
-
"""
|
| 16 |
-
|
| 17 |
-
from __future__ import annotations
|
| 18 |
-
|
| 19 |
-
import json
|
| 20 |
-
import io
|
| 21 |
-
from pathlib import Path
|
| 22 |
-
|
| 23 |
-
import numpy as np
|
| 24 |
-
import pandas as pd
|
| 25 |
-
import plotly.express as px
|
| 26 |
-
from langchain_core.tools import tool
|
| 27 |
-
from langchain_core.messages import HumanMessage
|
| 28 |
-
from langchain_mistralai import ChatMistralAI
|
| 29 |
-
|
| 30 |
-
DATA_DIR = Path("data")
|
| 31 |
-
DATA_DIR.mkdir(exist_ok=True)
|
| 32 |
-
|
| 33 |
-
PAJAIS_CATEGORIES = [
|
| 34 |
-
"Information Systems Theory", "IS Strategy & Governance",
|
| 35 |
-
"Digital Innovation", "Enterprise Systems",
|
| 36 |
-
"AI & Intelligent Systems", "Big Data & Analytics",
|
| 37 |
-
"Cybersecurity & Privacy", "Cloud Computing",
|
| 38 |
-
"IS in Healthcare", "IS in Education",
|
| 39 |
-
"E-Commerce & Digital Markets", "Social Media & Platforms",
|
| 40 |
-
"Human-Computer Interaction", "IS Project Management",
|
| 41 |
-
"IT Outsourcing", "Knowledge Management",
|
| 42 |
-
"IS Development Methodologies", "Digital Transformation",
|
| 43 |
-
"IS Ethics & Society", "IS in Developing Countries",
|
| 44 |
-
"Mobile Computing", "IT Infrastructure",
|
| 45 |
-
"IS Adoption & Diffusion", "IS Evaluation",
|
| 46 |
-
"Organizational IS & Change",
|
| 47 |
-
]
|
| 48 |
-
|
| 49 |
-
# ββ lazy-loaded models β initialised once on first call βββββββββββββββββββββββ
|
| 50 |
-
_SPECTER_TOKENIZER = None
|
| 51 |
-
_SPECTER_MODEL_OBJ = None
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def _get_specter():
|
| 55 |
-
global _SPECTER_TOKENIZER, _SPECTER_MODEL_OBJ
|
| 56 |
-
return (
|
| 57 |
-
(_SPECTER_TOKENIZER, _SPECTER_MODEL_OBJ)
|
| 58 |
-
if (_SPECTER_TOKENIZER is not None and _SPECTER_MODEL_OBJ is not None)
|
| 59 |
-
else _load_specter_fresh()
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
def _load_specter_fresh():
|
| 64 |
-
global _SPECTER_TOKENIZER, _SPECTER_MODEL_OBJ
|
| 65 |
-
from transformers import AutoTokenizer, AutoModel
|
| 66 |
-
MODEL_ID = "allenai/specter2_base"
|
| 67 |
-
print("Loading SPECTER2 (allenai/specter2_base) β one-time HuggingFace download, then cached...")
|
| 68 |
-
_SPECTER_TOKENIZER = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 69 |
-
_SPECTER_MODEL_OBJ = AutoModel.from_pretrained(MODEL_ID)
|
| 70 |
-
_SPECTER_MODEL_OBJ.eval()
|
| 71 |
-
print("SPECTER2 loaded OK.")
|
| 72 |
-
return _SPECTER_TOKENIZER, _SPECTER_MODEL_OBJ
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
def _embed_specter(texts: list) -> np.ndarray:
|
| 76 |
-
import torch
|
| 77 |
-
tokenizer, model = _get_specter()
|
| 78 |
-
BATCH = 8
|
| 79 |
-
all_embs = []
|
| 80 |
-
starts = list(range(0, len(texts), BATCH))
|
| 81 |
-
for start in starts:
|
| 82 |
-
batch = texts[start: start + BATCH]
|
| 83 |
-
inputs = tokenizer(batch, padding=True, truncation=True,
|
| 84 |
-
max_length=512, return_tensors="pt")
|
| 85 |
-
with torch.no_grad():
|
| 86 |
-
out = model(**inputs)
|
| 87 |
-
emb = out.last_hidden_state[:, 0, :].numpy() # CLS token
|
| 88 |
-
norms = np.linalg.norm(emb, axis=1, keepdims=True)
|
| 89 |
-
all_embs.append(emb / np.maximum(norms, 1e-9)) # L2-normalise
|
| 90 |
-
return np.vstack(all_embs)
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
def _p2() -> dict:
|
| 94 |
-
"""All paths for V2 β saved under data/v2/ only, never touches data/abstract/ or data/title/."""
|
| 95 |
-
d = DATA_DIR / "v2"
|
| 96 |
-
d.mkdir(parents=True, exist_ok=True)
|
| 97 |
-
return {
|
| 98 |
-
"dir": d,
|
| 99 |
-
"papers": d / "papers.json",
|
| 100 |
-
"embeddings": d / "embeddings.npy",
|
| 101 |
-
"umap_emb": d / "umap_emb.npy",
|
| 102 |
-
"clusters": d / "clusters.json",
|
| 103 |
-
"summaries": d / "summaries.json",
|
| 104 |
-
"taxonomy": d / "taxonomy.json",
|
| 105 |
-
"charts": d / "charts.json",
|
| 106 |
-
"audit_csv": d / "cluster_audit.csv",
|
| 107 |
-
"narrative": d / "narrative_v2.txt",
|
| 108 |
-
"comparison": DATA_DIR / "comparison_v2.csv",
|
| 109 |
-
}
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
def _read_csv_robust(path) -> pd.DataFrame:
|
| 113 |
-
raw = Path(path).read_bytes()
|
| 114 |
-
for enc in ["utf-8", "utf-8-sig", "latin-1", "cp1252"]:
|
| 115 |
-
decoded = raw.decode(enc, errors="replace")
|
| 116 |
-
return pd.read_csv(io.StringIO(decoded))
|
| 117 |
-
return pd.read_csv(path)
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
def _call_llm_json(llm, prompt: str):
|
| 121 |
-
response = llm.invoke([HumanMessage(content=prompt)])
|
| 122 |
-
raw = response.content.strip()
|
| 123 |
-
raw = raw.split("```json")[-1].split("```")[0].strip() if "```" in raw else raw
|
| 124 |
-
return json.loads(raw)
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
def _mode_label(labels: list) -> str:
|
| 128 |
-
from collections import Counter
|
| 129 |
-
return Counter(labels).most_common(1)[0][0]
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
# =============================================================================
|
| 133 |
-
# V2 TOOL 1 β load_and_embed_specter2
|
| 134 |
-
# =============================================================================
|
| 135 |
-
@tool
|
| 136 |
-
def load_and_embed_specter2(csv_path: str = "data/uploaded.csv") -> str:
|
| 137 |
-
"""Load Scopus CSV, build one combined Title+Abstract text per paper, embed with SPECTER2.
|
| 138 |
-
SPECTER2 (allenai/specter2_base) is a LOCAL HuggingFace model β NO API key needed.
|
| 139 |
-
First call downloads ~440 MB and caches; subsequent calls are instant.
|
| 140 |
-
Output saved to data/v2/ only β completely independent of Classic (v1) run.
|
| 141 |
-
Args:
|
| 142 |
-
csv_path: Path to uploaded Scopus CSV.
|
| 143 |
-
"""
|
| 144 |
-
p = _p2()
|
| 145 |
-
df = _read_csv_robust(csv_path)
|
| 146 |
-
|
| 147 |
-
col_map = {c.strip().lower(): c for c in df.columns}
|
| 148 |
-
title_col = col_map.get("title", next((c for c in df.columns if "title" in c.lower()), None))
|
| 149 |
-
abstract_col = col_map.get("abstract", next((c for c in df.columns if "abstract" in c.lower()), None))
|
| 150 |
-
doi_col = col_map.get("doi", next((c for c in df.columns if "doi" in c.lower()), None))
|
| 151 |
-
year_col = col_map.get("year", next((c for c in df.columns if "year" in c.lower()), None))
|
| 152 |
-
journal_col = next((c for c in df.columns if "source" in c.lower()), None)
|
| 153 |
-
|
| 154 |
-
n = len(df)
|
| 155 |
-
titles = list(df[title_col].fillna("") if title_col else [""] * n)
|
| 156 |
-
abstracts = list(df[abstract_col].fillna("") if abstract_col else [""] * n)
|
| 157 |
-
dois = list(df[doi_col].fillna("") if doi_col else [""] * n)
|
| 158 |
-
years = list(df[year_col].fillna("") if year_col else [""] * n)
|
| 159 |
-
journals = list(df[journal_col].fillna("") if journal_col else [""] * n)
|
| 160 |
-
|
| 161 |
-
combined = ["{} {}".format(str(titles[i]).strip(), str(abstracts[i]).strip()).strip()
|
| 162 |
-
for i in range(n)]
|
| 163 |
-
valid_idx = [i for i, t in enumerate(combined) if len(t.split()) > 5]
|
| 164 |
-
|
| 165 |
-
papers = [{
|
| 166 |
-
"paper_idx": i,
|
| 167 |
-
"title": titles[i],
|
| 168 |
-
"abstract": abstracts[i],
|
| 169 |
-
"doi": dois[i],
|
| 170 |
-
"year": str(years[i]),
|
| 171 |
-
"journal": str(journals[i]),
|
| 172 |
-
"combined": combined[i],
|
| 173 |
-
} for i in valid_idx]
|
| 174 |
-
|
| 175 |
-
p["papers"].write_text(json.dumps(papers, indent=2, ensure_ascii=False))
|
| 176 |
-
|
| 177 |
-
valid_texts = [combined[i] for i in valid_idx]
|
| 178 |
-
print("Embedding {} papers with SPECTER2...".format(len(valid_texts)))
|
| 179 |
-
embs = _embed_specter(valid_texts)
|
| 180 |
-
np.save(p["embeddings"], embs)
|
| 181 |
-
|
| 182 |
-
return json.dumps({
|
| 183 |
-
"total_papers": n,
|
| 184 |
-
"valid_papers": len(papers),
|
| 185 |
-
"embedding_dim": int(embs.shape[1]),
|
| 186 |
-
"note": "SPECTER2 embeddings saved to data/v2/. No API key needed.",
|
| 187 |
-
})
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
# =============================================================================
|
| 191 |
-
# V2 TOOL 2 β cluster_with_umap_hdbscan
|
| 192 |
-
# =============================================================================
|
| 193 |
-
@tool
|
| 194 |
-
def cluster_with_umap_hdbscan(
|
| 195 |
-
umap_neighbors: int = 15,
|
| 196 |
-
umap_min_dist: float = 0.05,
|
| 197 |
-
hdbscan_min_cluster_size: int = 5,
|
| 198 |
-
hdbscan_min_samples: int = 3,
|
| 199 |
-
) -> str:
|
| 200 |
-
"""Reduce SPECTER2 embeddings with UMAP (cosine) then cluster with HDBSCAN.
|
| 201 |
-
Targets 15-30 clusters, each with 5-120 papers. Saves results + charts to data/v2/.
|
| 202 |
-
Args:
|
| 203 |
-
umap_neighbors: UMAP n_neighbors (default 15).
|
| 204 |
-
umap_min_dist: UMAP min_dist (default 0.05 for tight clusters).
|
| 205 |
-
hdbscan_min_cluster_size: Min papers per cluster (default 5).
|
| 206 |
-
hdbscan_min_samples: HDBSCAN core-point threshold (default 3).
|
| 207 |
-
"""
|
| 208 |
-
import umap as umap_mod
|
| 209 |
-
import hdbscan as hdbscan_mod
|
| 210 |
-
|
| 211 |
-
p = _p2()
|
| 212 |
-
embs = np.load(p["embeddings"])
|
| 213 |
-
papers = json.loads(p["papers"].read_text())
|
| 214 |
-
|
| 215 |
-
print("UMAP fit (n_neighbors={}, min_dist={})...".format(umap_neighbors, umap_min_dist))
|
| 216 |
-
reducer = umap_mod.UMAP(
|
| 217 |
-
n_components=5, n_neighbors=umap_neighbors,
|
| 218 |
-
min_dist=umap_min_dist, metric="cosine",
|
| 219 |
-
random_state=42, verbose=False,
|
| 220 |
-
)
|
| 221 |
-
umap_embs = reducer.fit_transform(embs)
|
| 222 |
-
np.save(p["umap_emb"], umap_embs)
|
| 223 |
-
|
| 224 |
-
print("HDBSCAN fit (min_cluster_size={})...".format(hdbscan_min_cluster_size))
|
| 225 |
-
clusterer = hdbscan_mod.HDBSCAN(
|
| 226 |
-
min_cluster_size=hdbscan_min_cluster_size,
|
| 227 |
-
min_samples=hdbscan_min_samples,
|
| 228 |
-
metric="euclidean",
|
| 229 |
-
cluster_selection_method="eom",
|
| 230 |
-
prediction_data=True,
|
| 231 |
-
)
|
| 232 |
-
labels = clusterer.fit_predict(umap_embs)
|
| 233 |
-
probs = clusterer.probabilities_
|
| 234 |
-
unique = sorted(set(labels.tolist()) - {-1})
|
| 235 |
-
print("Raw clusters: {}, noise: {}".format(len(unique), int((labels == -1).sum())))
|
| 236 |
-
|
| 237 |
-
def build_cluster(enum_pair):
|
| 238 |
-
seq_id, raw_cid = enum_pair
|
| 239 |
-
mask = labels == raw_cid
|
| 240 |
-
indices = [i for i, m in enumerate(mask.tolist()) if m]
|
| 241 |
-
cpaps = [papers[i] for i in indices]
|
| 242 |
-
cembs = embs[mask]
|
| 243 |
-
cprobs = probs[mask].tolist()
|
| 244 |
-
centroid = cembs.mean(axis=0)
|
| 245 |
-
c_norm = centroid / max(float(np.linalg.norm(centroid)), 1e-9)
|
| 246 |
-
norms = np.linalg.norm(cembs, axis=1, keepdims=True)
|
| 247 |
-
sims = (cembs / np.maximum(norms, 1e-9) @ c_norm).tolist()
|
| 248 |
-
top3 = sorted(range(len(sims)), key=lambda x: -sims[x])[:3]
|
| 249 |
-
return {
|
| 250 |
-
"cluster_id": seq_id + 1,
|
| 251 |
-
"paper_count": int(mask.sum()),
|
| 252 |
-
"papers": cpaps,
|
| 253 |
-
"hdbscan_probs": cprobs,
|
| 254 |
-
"centroid_sims": sims,
|
| 255 |
-
"centroid": centroid.tolist(),
|
| 256 |
-
"top3_paper_idx": top3,
|
| 257 |
-
"top3_titles": [cpaps[i]["title"] for i in top3],
|
| 258 |
-
"top3_abstracts": [cpaps[i]["abstract"][:200] for i in top3],
|
| 259 |
-
}
|
| 260 |
-
|
| 261 |
-
all_clusters = list(map(build_cluster, enumerate(unique)))
|
| 262 |
-
valid = sorted([c for c in all_clusters if 5 <= c["paper_count"] <= 120],
|
| 263 |
-
key=lambda c: -c["paper_count"])
|
| 264 |
-
valid = [{**c, "cluster_id": i + 1} for i, c in enumerate(valid)]
|
| 265 |
-
noise = int((labels == -1).sum())
|
| 266 |
-
|
| 267 |
-
# 2-D UMAP for scatter chart
|
| 268 |
-
r2d = umap_mod.UMAP(n_components=2, n_neighbors=umap_neighbors,
|
| 269 |
-
min_dist=umap_min_dist, metric="cosine",
|
| 270 |
-
random_state=42, verbose=False)
|
| 271 |
-
umap_2d = r2d.fit_transform(embs)
|
| 272 |
-
cdf = pd.DataFrame({
|
| 273 |
-
"x": umap_2d[:, 0].tolist(), "y": umap_2d[:, 1].tolist(),
|
| 274 |
-
"cluster": [str(lb) for lb in labels.tolist()],
|
| 275 |
-
"title": [pp["title"][:50] for pp in papers],
|
| 276 |
-
"prob": probs.tolist(),
|
| 277 |
-
})
|
| 278 |
-
fig_s = px.scatter(cdf, x="x", y="y", color="cluster",
|
| 279 |
-
hover_data=["title", "prob"],
|
| 280 |
-
title="UMAP+HDBSCAN β {} clusters, {} noise".format(len(valid), noise))
|
| 281 |
-
fig_b = px.bar(
|
| 282 |
-
x=["C{}".format(c["cluster_id"]) for c in valid],
|
| 283 |
-
y=[c["paper_count"] for c in valid],
|
| 284 |
-
title="Papers per Cluster",
|
| 285 |
-
)
|
| 286 |
-
charts = {
|
| 287 |
-
"scatter": fig_s.to_html(full_html=False, include_plotlyjs="cdn"),
|
| 288 |
-
"bar": fig_b.to_html(full_html=False, include_plotlyjs=False),
|
| 289 |
-
}
|
| 290 |
-
p["charts"].write_text(json.dumps(charts))
|
| 291 |
-
p["clusters"].write_text(json.dumps(valid, indent=2, ensure_ascii=False))
|
| 292 |
-
|
| 293 |
-
return json.dumps({
|
| 294 |
-
"clusters_found": len(valid),
|
| 295 |
-
"noise_papers": noise,
|
| 296 |
-
"total_papers": len(papers),
|
| 297 |
-
"cluster_sizes": [c["paper_count"] for c in valid],
|
| 298 |
-
"within_15_30": 15 <= len(valid) <= 30,
|
| 299 |
-
"note": "{} clusters (5-120 papers each). Ready for council-of-3 labeling.".format(len(valid)),
|
| 300 |
-
})
|
| 301 |
-
|
| 302 |
-
|
| 303 |
# =============================================================================
|
| 304 |
-
# V2 TOOL 3 β label_clusters_council_of_3
|
| 305 |
# =============================================================================
|
| 306 |
@tool
|
| 307 |
def label_clusters_council_of_3(batch_size: int = 5) -> str:
|
| 308 |
-
"""Label each cluster using a council of 3
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
Final label = mode (most common) of the 3 responses.
|
| 310 |
Vote agreement = unanimous / majority / split.
|
| 311 |
Saves enriched summaries + full audit CSV (one row per paper) to data/v2/.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
Args:
|
| 313 |
batch_size: Clusters per LLM call (default 5).
|
| 314 |
"""
|
| 315 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
p = _p2()
|
| 317 |
clusters = json.loads(p["clusters"].read_text())
|
| 318 |
|
| 319 |
-
|
|
|
|
| 320 |
{
|
| 321 |
-
"name":
|
| 322 |
-
"
|
| 323 |
-
"
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
"(e.g. 'Enterprise Resource Planning Adoption Barriers', "
|
| 327 |
-
"'IS Governance Frameworks Healthcare')."
|
| 328 |
),
|
| 329 |
},
|
| 330 |
{
|
| 331 |
-
"name":
|
| 332 |
-
"
|
| 333 |
-
"
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
"(e.g. 'Organisational Change Through Digital Platforms', "
|
| 337 |
-
"'Strategic IT-Business Alignment Mechanisms')."
|
| 338 |
),
|
| 339 |
},
|
| 340 |
{
|
| 341 |
-
"name":
|
| 342 |
-
"
|
| 343 |
-
"
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
"'Cloud Infrastructure Scalability Patterns')."
|
| 347 |
),
|
| 348 |
},
|
| 349 |
]
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
def make_prompt(
|
| 354 |
-
mini = [
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
return (
|
| 358 |
-
|
| 359 |
-
"Label each cluster
|
|
|
|
|
|
|
| 360 |
"CLUSTERS:\n" + json.dumps(mini, indent=2) + "\n\n"
|
| 361 |
"Return ONLY a raw JSON array β no markdown, no preamble.\n"
|
| 362 |
"Each element: cluster_id (int), label (4-7 words), "
|
| 363 |
"confidence (High/Medium/Low), reasoning (one sentence)."
|
| 364 |
)
|
|
|
|
| 365 |
|
|
|
|
|
|
|
|
|
|
| 366 |
persona_results = [{}, {}, {}]
|
| 367 |
-
batch_starts
|
| 368 |
|
| 369 |
-
for pi,
|
|
|
|
|
|
|
| 370 |
all_labels = []
|
|
|
|
|
|
|
|
|
|
| 371 |
for bi, start in enumerate(batch_starts):
|
| 372 |
batch = clusters[start: start + batch_size]
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
for item in all_labels:
|
| 377 |
cid = int(item.get("cluster_id", 0))
|
| 378 |
persona_results[pi][cid] = item
|
| 379 |
-
_ = time.sleep(15) if pi < len(PERSONAS) - 1 else None
|
| 380 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
def enrich(cluster):
|
| 382 |
cid = cluster["cluster_id"]
|
| 383 |
-
raw_votes = [
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
return {
|
| 392 |
**cluster,
|
| 393 |
"label": final,
|
| 394 |
-
"
|
| 395 |
-
"
|
| 396 |
-
"
|
| 397 |
"confidence_1": persona_results[0].get(cid, {}).get("confidence", ""),
|
| 398 |
"confidence_2": persona_results[1].get(cid, {}).get("confidence", ""),
|
| 399 |
"confidence_3": persona_results[2].get(cid, {}).get("confidence", ""),
|
|
@@ -412,29 +170,29 @@ def label_clusters_council_of_3(batch_size: int = 5) -> str:
|
|
| 412 |
cid = c["cluster_id"]
|
| 413 |
for li, paper in enumerate(c["papers"]):
|
| 414 |
rows.append({
|
| 415 |
-
"cluster_id":
|
| 416 |
-
"final_label":
|
| 417 |
-
"vote_agreement":
|
| 418 |
-
"
|
| 419 |
-
"
|
| 420 |
-
"
|
| 421 |
-
"llm1_confidence":
|
| 422 |
-
"llm2_confidence":
|
| 423 |
-
"llm3_confidence":
|
| 424 |
-
"llm1_reasoning":
|
| 425 |
-
"llm2_reasoning":
|
| 426 |
-
"llm3_reasoning":
|
| 427 |
-
"paper_doi":
|
| 428 |
-
"paper_title":
|
| 429 |
-
"paper_year":
|
| 430 |
-
"paper_journal":
|
| 431 |
-
"abstract_preview":
|
| 432 |
-
"combined_preview":
|
| 433 |
-
"centroid_cosine_sim":
|
| 434 |
c["centroid_sims"][li] if li < len(c["centroid_sims"]) else 0.0), 4),
|
| 435 |
-
"hdbscan_probability":
|
| 436 |
c["hdbscan_probs"][li] if li < len(c["hdbscan_probs"]) else 0.0), 4),
|
| 437 |
-
"is_top3_centroid":
|
| 438 |
})
|
| 439 |
|
| 440 |
pd.DataFrame(rows).to_csv(p["audit_csv"], index=False, encoding="utf-8-sig")
|
|
@@ -447,96 +205,6 @@ def label_clusters_council_of_3(batch_size: int = 5) -> str:
|
|
| 447 |
"majority": majority,
|
| 448 |
"split": len(enriched) - unanimous - majority,
|
| 449 |
"audit_csv_rows": len(rows),
|
| 450 |
-
"
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
# =============================================================================
|
| 455 |
-
# V2 TOOL 4 β map_clusters_to_pajais_v2
|
| 456 |
-
# =============================================================================
|
| 457 |
-
@tool
|
| 458 |
-
def map_clusters_to_pajais_v2() -> str:
|
| 459 |
-
"""Map v2 cluster labels to PAJAIS 25 IS research categories via Mistral LLM.
|
| 460 |
-
Saves taxonomy to data/v2/taxonomy.json. Independent of v1 taxonomy.
|
| 461 |
-
"""
|
| 462 |
-
import time
|
| 463 |
-
p = _p2()
|
| 464 |
-
summaries = json.loads(p["summaries"].read_text())
|
| 465 |
-
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.1)
|
| 466 |
-
|
| 467 |
-
mini = [{"cluster_id": s["cluster_id"], "name": s["label"],
|
| 468 |
-
"sample": s["top3_titles"][:2]} for s in summaries]
|
| 469 |
-
BATCH = 10
|
| 470 |
-
starts = list(range(0, len(mini), BATCH))
|
| 471 |
-
results = []
|
| 472 |
-
|
| 473 |
-
for bi, start in enumerate(starts):
|
| 474 |
-
batch = mini[start: start + BATCH]
|
| 475 |
-
prompt = (
|
| 476 |
-
"Map each IS research cluster to the single most relevant PAJAIS category.\n\n"
|
| 477 |
-
"CLUSTERS:\n" + json.dumps(batch, indent=2) + "\n\n"
|
| 478 |
-
"PAJAIS CATEGORIES:\n" + json.dumps(PAJAIS_CATEGORIES, indent=2) + "\n\n"
|
| 479 |
-
"Return ONLY a raw JSON array. Each element: "
|
| 480 |
-
"cluster_id (int), name (str), pajais_category (str), "
|
| 481 |
-
"confidence (High/Medium/Low), rationale (one sentence). "
|
| 482 |
-
"No markdown."
|
| 483 |
-
)
|
| 484 |
-
results.extend(_call_llm_json(llm, prompt))
|
| 485 |
-
_ = time.sleep(10) if bi < len(starts) - 1 else None
|
| 486 |
-
|
| 487 |
-
p["taxonomy"].write_text(json.dumps(results, indent=2, ensure_ascii=False))
|
| 488 |
-
return json.dumps({"mapped_clusters": len(results),
|
| 489 |
-
"note": "PAJAIS taxonomy saved to data/v2/taxonomy.json"})
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
# =============================================================================
|
| 493 |
-
# V2 TOOL 5 β export_v2_outputs
|
| 494 |
-
# =============================================================================
|
| 495 |
-
@tool
|
| 496 |
-
def export_v2_outputs() -> str:
|
| 497 |
-
"""Generate final comparison_v2.csv and narrative_v2.txt for the SPECTER2 run.
|
| 498 |
-
comparison_v2.csv: enriched audit CSV with PAJAIS column added.
|
| 499 |
-
narrative_v2.txt: 500-word Section 7 academic discussion.
|
| 500 |
-
Both saved to data/v2/ and data/comparison_v2.csv.
|
| 501 |
-
"""
|
| 502 |
-
p = _p2()
|
| 503 |
-
summaries = json.loads(p["summaries"].read_text())
|
| 504 |
-
taxonomy = json.loads(p["taxonomy"].read_text())
|
| 505 |
-
tax_map = {str(item.get("cluster_id", "")): item.get("pajais_category", "Unknown")
|
| 506 |
-
for item in taxonomy}
|
| 507 |
-
|
| 508 |
-
audit_df = pd.read_csv(p["audit_csv"], encoding="utf-8-sig")
|
| 509 |
-
audit_df["pajais_category"] = [
|
| 510 |
-
tax_map.get(str(int(float(str(row["cluster_id"])))), "Unknown")
|
| 511 |
-
for _, row in audit_df.iterrows()
|
| 512 |
-
]
|
| 513 |
-
out_path = p["comparison"]
|
| 514 |
-
audit_df.to_csv(out_path, index=False, encoding="utf-8-sig")
|
| 515 |
-
|
| 516 |
-
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.4)
|
| 517 |
-
cluster_summary = [{"cluster": s["cluster_id"], "label": s["label"],
|
| 518 |
-
"papers": s["paper_count"], "agreement": s["vote_agreement"]}
|
| 519 |
-
for s in summaries]
|
| 520 |
-
|
| 521 |
-
prompt = (
|
| 522 |
-
"Write Section 7 (Discussion and Thematic Synthesis) for a systematic "
|
| 523 |
-
"IS literature review. ~500 words, formal academic prose.\n"
|
| 524 |
-
"Method: SPECTER2 document embeddings + UMAP + HDBSCAN + council-of-3-LLMs labeling.\n"
|
| 525 |
-
"Cover: (a) overview of clusters/themes, (b) dominant PAJAIS categories, "
|
| 526 |
-
"(c) inter-cluster relationships, (d) implications for IS research, "
|
| 527 |
-
"(e) methodological contribution vs traditional BERTopic, (f) limitations.\n\n"
|
| 528 |
-
"CLUSTERS:\n" + json.dumps(cluster_summary, indent=2) + "\n\n"
|
| 529 |
-
"PAJAIS MAPPING:\n" + json.dumps(taxonomy, indent=2) + "\n\n"
|
| 530 |
-
"Continuous academic paragraphs only. No bullet points or headers."
|
| 531 |
-
)
|
| 532 |
-
response = llm.invoke([HumanMessage(content=prompt)])
|
| 533 |
-
narrative = response.content
|
| 534 |
-
p["narrative"].write_text(narrative, encoding="utf-8")
|
| 535 |
-
|
| 536 |
-
return json.dumps({
|
| 537 |
-
"comparison_csv_rows": len(audit_df),
|
| 538 |
-
"comparison_csv_path": str(out_path),
|
| 539 |
-
"narrative_words": len(narrative.split()),
|
| 540 |
-
"narrative_path": str(p["narrative"]),
|
| 541 |
-
"note": "comparison_v2.csv + narrative_v2.txt ready in Download tab.",
|
| 542 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# =============================================================================
|
| 2 |
+
# V2 TOOL 3 β label_clusters_council_of_3 (TRUE multi-LLM ensemble)
|
| 3 |
# =============================================================================
|
| 4 |
@tool
|
| 5 |
def label_clusters_council_of_3(batch_size: int = 5) -> str:
|
| 6 |
+
"""Label each cluster using a TRUE council of 3 DIFFERENT LLMs:
|
| 7 |
+
1. Mistral (mistral-small-latest)
|
| 8 |
+
2. OpenAI (gpt-4o-mini)
|
| 9 |
+
3. Groq (llama3-70b-8192)
|
| 10 |
+
Each model receives the SAME prompt independently.
|
| 11 |
Final label = mode (most common) of the 3 responses.
|
| 12 |
Vote agreement = unanimous / majority / split.
|
| 13 |
Saves enriched summaries + full audit CSV (one row per paper) to data/v2/.
|
| 14 |
+
|
| 15 |
+
API keys are read automatically from environment variables:
|
| 16 |
+
MISTRAL_API_KEY, OPENAI_API_KEY, GROQ_API_KEY
|
| 17 |
+
Set these in HuggingFace Space β Settings β Variables and Secrets.
|
| 18 |
+
|
| 19 |
Args:
|
| 20 |
batch_size: Clusters per LLM call (default 5).
|
| 21 |
"""
|
| 22 |
import time
|
| 23 |
+
import os
|
| 24 |
+
|
| 25 |
+
# ββ NEW: import all 3 LangChain integrations ββββββββββββββββββββββββββββββ
|
| 26 |
+
from langchain_mistralai import ChatMistralAI
|
| 27 |
+
from langchain_openai import ChatOpenAI
|
| 28 |
+
from langchain_groq import ChatGroq
|
| 29 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
|
| 31 |
p = _p2()
|
| 32 |
clusters = json.loads(p["clusters"].read_text())
|
| 33 |
|
| 34 |
+
# ββ NEW: define 3 real LLMs (keys picked up from env automatically) βββββββ
|
| 35 |
+
COUNCIL = [
|
| 36 |
{
|
| 37 |
+
"name": "MISTRAL",
|
| 38 |
+
"model": ChatMistralAI(
|
| 39 |
+
model="mistral-small-latest",
|
| 40 |
+
temperature=0.2,
|
| 41 |
+
# api_key read from MISTRAL_API_KEY env var automatically
|
|
|
|
|
|
|
| 42 |
),
|
| 43 |
},
|
| 44 |
{
|
| 45 |
+
"name": "OPENAI",
|
| 46 |
+
"model": ChatOpenAI(
|
| 47 |
+
model="gpt-4o-mini",
|
| 48 |
+
temperature=0.2,
|
| 49 |
+
# api_key read from OPENAI_API_KEY env var automatically
|
|
|
|
|
|
|
| 50 |
),
|
| 51 |
},
|
| 52 |
{
|
| 53 |
+
"name": "GROQ",
|
| 54 |
+
"model": ChatGroq(
|
| 55 |
+
model="llama3-70b-8192",
|
| 56 |
+
temperature=0.2,
|
| 57 |
+
# api_key read from GROQ_API_KEY env var automatically
|
|
|
|
| 58 |
),
|
| 59 |
},
|
| 60 |
]
|
| 61 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
|
| 63 |
+
# ββ UNCHANGED: single shared prompt builder (same prompt for all 3 LLMs) ββ
|
| 64 |
+
def make_prompt(batch):
|
| 65 |
+
mini = [
|
| 66 |
+
{
|
| 67 |
+
"cluster_id": c["cluster_id"],
|
| 68 |
+
"paper_count": c["paper_count"],
|
| 69 |
+
"top3_titles": c["top3_titles"],
|
| 70 |
+
"top3_abstracts": c["top3_abstracts"],
|
| 71 |
+
}
|
| 72 |
+
for c in batch
|
| 73 |
+
]
|
| 74 |
return (
|
| 75 |
+
"You are an Information Systems research expert conducting a systematic "
|
| 76 |
+
"literature review. Label each cluster with a precise 4-7 word noun-phrase "
|
| 77 |
+
"that reflects its core IS research theme.\n\n"
|
| 78 |
+
"Cluster IDs in this batch: " + str([c["cluster_id"] for c in batch]) + "\n\n"
|
| 79 |
"CLUSTERS:\n" + json.dumps(mini, indent=2) + "\n\n"
|
| 80 |
"Return ONLY a raw JSON array β no markdown, no preamble.\n"
|
| 81 |
"Each element: cluster_id (int), label (4-7 words), "
|
| 82 |
"confidence (High/Medium/Low), reasoning (one sentence)."
|
| 83 |
)
|
| 84 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
|
| 86 |
+
# ββ NEW: run each LLM independently across all batches βββββββββββββββββββ
|
| 87 |
+
# persona_results[i] = { cluster_id: {label, confidence, reasoning} }
|
| 88 |
+
# shape is identical to before so all downstream code is UNCHANGED
|
| 89 |
persona_results = [{}, {}, {}]
|
| 90 |
+
batch_starts = list(range(0, len(clusters), batch_size))
|
| 91 |
|
| 92 |
+
for pi, member in enumerate(COUNCIL):
|
| 93 |
+
llm = member["model"]
|
| 94 |
+
llm_name = member["name"]
|
| 95 |
all_labels = []
|
| 96 |
+
|
| 97 |
+
print(f"Council member {pi+1}/3 ({llm_name}) labeling {len(clusters)} clusters...")
|
| 98 |
+
|
| 99 |
for bi, start in enumerate(batch_starts):
|
| 100 |
batch = clusters[start: start + batch_size]
|
| 101 |
+
prompt = make_prompt(batch) # same prompt for every LLM
|
| 102 |
+
|
| 103 |
+
# ββ NEW: per-model error handling so one failure doesn't kill all β
|
| 104 |
+
try:
|
| 105 |
+
result = _call_llm_json(llm, prompt)
|
| 106 |
+
all_labels.extend(result)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f" WARNING: {llm_name} batch {bi} failed: {e}. Using fallback labels.")
|
| 109 |
+
for c in batch:
|
| 110 |
+
all_labels.append({
|
| 111 |
+
"cluster_id": c["cluster_id"],
|
| 112 |
+
"label": f"Cluster {c['cluster_id']} ({llm_name} error)",
|
| 113 |
+
"confidence": "Low",
|
| 114 |
+
"reasoning": f"Fallback β {llm_name} error: {str(e)[:80]}",
|
| 115 |
+
})
|
| 116 |
+
# ββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½ββββββββββββββββββββ
|
| 117 |
+
|
| 118 |
+
# small delay between batches to respect rate limits
|
| 119 |
+
if bi < len(batch_starts) - 1:
|
| 120 |
+
time.sleep(8)
|
| 121 |
+
|
| 122 |
for item in all_labels:
|
| 123 |
cid = int(item.get("cluster_id", 0))
|
| 124 |
persona_results[pi][cid] = item
|
|
|
|
| 125 |
|
| 126 |
+
# delay between council members (Groq is fast, Mistral/OpenAI need breathing room)
|
| 127 |
+
if pi < len(COUNCIL) - 1:
|
| 128 |
+
time.sleep(10)
|
| 129 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 130 |
+
|
| 131 |
+
# ββ UNCHANGED from here down: voting + enrichment + CSV export βββββββββββ
|
| 132 |
def enrich(cluster):
|
| 133 |
cid = cluster["cluster_id"]
|
| 134 |
+
raw_votes = [
|
| 135 |
+
str(persona_results[pi].get(cid, {}).get("label", "")).strip()
|
| 136 |
+
for pi in range(3)
|
| 137 |
+
]
|
| 138 |
+
votes = [
|
| 139 |
+
v if v and v.lower() not in ("", "none", "null")
|
| 140 |
+
else "Cluster {}".format(cid)
|
| 141 |
+
for v in raw_votes
|
| 142 |
+
]
|
| 143 |
+
final = _mode_label(votes)
|
| 144 |
+
agreement = (
|
| 145 |
+
"unanimous" if len(set(votes)) == 1
|
| 146 |
+
else "majority" if votes.count(final) >= 2
|
| 147 |
+
else "split"
|
| 148 |
+
)
|
| 149 |
return {
|
| 150 |
**cluster,
|
| 151 |
"label": final,
|
| 152 |
+
"llm_vote_1_MISTRAL": votes[0], # key renamed to match real model
|
| 153 |
+
"llm_vote_2_OPENAI": votes[1], # key renamed to match real model
|
| 154 |
+
"llm_vote_3_GROQ": votes[2], # key renamed to match real model
|
| 155 |
"confidence_1": persona_results[0].get(cid, {}).get("confidence", ""),
|
| 156 |
"confidence_2": persona_results[1].get(cid, {}).get("confidence", ""),
|
| 157 |
"confidence_3": persona_results[2].get(cid, {}).get("confidence", ""),
|
|
|
|
| 170 |
cid = c["cluster_id"]
|
| 171 |
for li, paper in enumerate(c["papers"]):
|
| 172 |
rows.append({
|
| 173 |
+
"cluster_id": cid,
|
| 174 |
+
"final_label": c["label"],
|
| 175 |
+
"vote_agreement": c["vote_agreement"],
|
| 176 |
+
"llm1_MISTRAL_label": c["llm_vote_1_MISTRAL"], # renamed
|
| 177 |
+
"llm2_OPENAI_label": c["llm_vote_2_OPENAI"], # renamed
|
| 178 |
+
"llm3_GROQ_label": c["llm_vote_3_GROQ"], # renamed
|
| 179 |
+
"llm1_confidence": c["confidence_1"],
|
| 180 |
+
"llm2_confidence": c["confidence_2"],
|
| 181 |
+
"llm3_confidence": c["confidence_3"],
|
| 182 |
+
"llm1_reasoning": c["reasoning_1"],
|
| 183 |
+
"llm2_reasoning": c["reasoning_2"],
|
| 184 |
+
"llm3_reasoning": c["reasoning_3"],
|
| 185 |
+
"paper_doi": paper.get("doi", ""),
|
| 186 |
+
"paper_title": paper.get("title", ""),
|
| 187 |
+
"paper_year": paper.get("year", ""),
|
| 188 |
+
"paper_journal": paper.get("journal", ""),
|
| 189 |
+
"abstract_preview": paper.get("abstract", "")[:300],
|
| 190 |
+
"combined_preview": paper.get("combined", "")[:200],
|
| 191 |
+
"centroid_cosine_sim": round(float(
|
| 192 |
c["centroid_sims"][li] if li < len(c["centroid_sims"]) else 0.0), 4),
|
| 193 |
+
"hdbscan_probability": round(float(
|
| 194 |
c["hdbscan_probs"][li] if li < len(c["hdbscan_probs"]) else 0.0), 4),
|
| 195 |
+
"is_top3_centroid": "YES" if li in c["top3_paper_idx"] else "no",
|
| 196 |
})
|
| 197 |
|
| 198 |
pd.DataFrame(rows).to_csv(p["audit_csv"], index=False, encoding="utf-8-sig")
|
|
|
|
| 205 |
"majority": majority,
|
| 206 |
"split": len(enriched) - unanimous - majority,
|
| 207 |
"audit_csv_rows": len(rows),
|
| 208 |
+
"council_members": [m["name"] for m in COUNCIL], # NEW: visible in output
|
| 209 |
+
"note": "True 3-LLM ensemble (Mistral+OpenAI+Groq). Audit CSV ready ({} rows).".format(len(rows)),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
})
|