Spaces:
Sleeping
Sleeping
Update tools.py
Browse files
tools.py
CHANGED
|
@@ -115,7 +115,7 @@ def _split_sentences(text):
|
|
| 115 |
|
| 116 |
|
| 117 |
# ββ Tool 1: load_scopus_csv ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 118 |
-
@tool
|
| 119 |
def load_scopus_csv(filepath: str) -> str:
|
| 120 |
"""Load a Scopus CSV export, count papers and sentences, apply boilerplate filtering.
|
| 121 |
Returns stats string with paper count, abstract sentence count, title sentence count.
|
|
@@ -153,7 +153,7 @@ def load_scopus_csv(filepath: str) -> str:
|
|
| 153 |
|
| 154 |
|
| 155 |
# ββ Tool 2: run_bertopic_discovery ββββββββββββββββββββββββββββββββββββββββββββ
|
| 156 |
-
@tool
|
| 157 |
def run_bertopic_discovery(run_key: str, threshold: float = 0.7) -> str:
|
| 158 |
"""Embed sentences with all-MiniLM-L6-v2, cluster with AgglomerativeClustering (cosine metric),
|
| 159 |
find 5 nearest centroids per cluster, generate 4 Plotly charts. Save summaries.json + emb.npy.
|
|
@@ -289,7 +289,7 @@ def _generate_charts(cluster_data, run_key, embeddings, labels_arr):
|
|
| 289 |
|
| 290 |
|
| 291 |
# ββ Tool 3: label_topics_with_llm βββββββββββββββββββββββββββββββββββββββββββββ
|
| 292 |
-
@tool
|
| 293 |
def label_topics_with_llm(run_key: str) -> str:
|
| 294 |
"""Send top MAX_LABEL_TOPICS topics to Mistral for labelling. Each topic gets:
|
| 295 |
label, category, confidence, reasoning, niche (true/false).
|
|
@@ -345,7 +345,7 @@ JSON array:"""
|
|
| 345 |
|
| 346 |
|
| 347 |
# ββ Tool 4: consolidate_into_themes βββββββββββββββββββββββββββββββββββββββββββ
|
| 348 |
-
@tool
|
| 349 |
def consolidate_into_themes(run_key: str, theme_map: str) -> str:
|
| 350 |
"""Merge researcher-approved topic groups into consolidated themes.
|
| 351 |
Recomputes centroids, recounts sentences and papers.
|
|
@@ -400,7 +400,7 @@ def _build_theme(name, topic_ids, label_lookup):
|
|
| 400 |
|
| 401 |
|
| 402 |
# ββ Tool 5: compare_with_taxonomy βββββββββββββββββββββββββββββββββββββββββββββ
|
| 403 |
-
@tool
|
| 404 |
def compare_with_taxonomy(run_key: str) -> str:
|
| 405 |
"""Map final themes to PAJAIS 25-category taxonomy using Mistral.
|
| 406 |
Each theme gets: pajais_match (or NOVEL), match_confidence, reasoning, is_novel.
|
|
@@ -461,7 +461,7 @@ JSON array:"""
|
|
| 461 |
|
| 462 |
|
| 463 |
# ββ Tool 6: generate_comparison_csv βββββββββββββββββββββββββββββββββββββββββββ
|
| 464 |
-
@tool
|
| 465 |
def generate_comparison_csv() -> str:
|
| 466 |
"""Load themes from both abstract and title runs, create side-by-side comparison DataFrame.
|
| 467 |
Saves comparison.csv showing convergence and divergence between runs."""
|
|
@@ -509,7 +509,7 @@ def generate_comparison_csv() -> str:
|
|
| 509 |
|
| 510 |
|
| 511 |
# ββ Tool 7: export_narrative βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 512 |
-
@tool
|
| 513 |
def export_narrative(run_key: str) -> str:
|
| 514 |
"""Generate a 500-word Section 7 narrative report for the literature review paper.
|
| 515 |
Uses themes and taxonomy mapping via Mistral. Saves narrative.txt.
|
|
@@ -568,4 +568,4 @@ ALL_TOOLS = [
|
|
| 568 |
compare_with_taxonomy,
|
| 569 |
generate_comparison_csv,
|
| 570 |
export_narrative,
|
| 571 |
-
]
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
# ββ Tool 1: load_scopus_csv ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 118 |
+
@tool
|
| 119 |
def load_scopus_csv(filepath: str) -> str:
|
| 120 |
"""Load a Scopus CSV export, count papers and sentences, apply boilerplate filtering.
|
| 121 |
Returns stats string with paper count, abstract sentence count, title sentence count.
|
|
|
|
| 153 |
|
| 154 |
|
| 155 |
# ββ Tool 2: run_bertopic_discovery ββββββββββββββββββββββββββββββββββββββββββββ
|
| 156 |
+
@tool
|
| 157 |
def run_bertopic_discovery(run_key: str, threshold: float = 0.7) -> str:
|
| 158 |
"""Embed sentences with all-MiniLM-L6-v2, cluster with AgglomerativeClustering (cosine metric),
|
| 159 |
find 5 nearest centroids per cluster, generate 4 Plotly charts. Save summaries.json + emb.npy.
|
|
|
|
| 289 |
|
| 290 |
|
| 291 |
# ββ Tool 3: label_topics_with_llm βββββββββββββββββββββββββββββββββββββββββββββ
|
| 292 |
+
@tool
|
| 293 |
def label_topics_with_llm(run_key: str) -> str:
|
| 294 |
"""Send top MAX_LABEL_TOPICS topics to Mistral for labelling. Each topic gets:
|
| 295 |
label, category, confidence, reasoning, niche (true/false).
|
|
|
|
| 345 |
|
| 346 |
|
| 347 |
# ββ Tool 4: consolidate_into_themes βββββββββββββββββββββββββββββββββββββββββββ
|
| 348 |
+
@tool
|
| 349 |
def consolidate_into_themes(run_key: str, theme_map: str) -> str:
|
| 350 |
"""Merge researcher-approved topic groups into consolidated themes.
|
| 351 |
Recomputes centroids, recounts sentences and papers.
|
|
|
|
| 400 |
|
| 401 |
|
| 402 |
# ββ Tool 5: compare_with_taxonomy βββββββββββββββββββββββββββββββββββββββββββββ
|
| 403 |
+
@tool
|
| 404 |
def compare_with_taxonomy(run_key: str) -> str:
|
| 405 |
"""Map final themes to PAJAIS 25-category taxonomy using Mistral.
|
| 406 |
Each theme gets: pajais_match (or NOVEL), match_confidence, reasoning, is_novel.
|
|
|
|
| 461 |
|
| 462 |
|
| 463 |
# ββ Tool 6: generate_comparison_csv βββββββββββββββββββββββββββββββββββββββββββ
|
| 464 |
+
@tool
|
| 465 |
def generate_comparison_csv() -> str:
|
| 466 |
"""Load themes from both abstract and title runs, create side-by-side comparison DataFrame.
|
| 467 |
Saves comparison.csv showing convergence and divergence between runs."""
|
|
|
|
| 509 |
|
| 510 |
|
| 511 |
# ββ Tool 7: export_narrative βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 512 |
+
@tool
|
| 513 |
def export_narrative(run_key: str) -> str:
|
| 514 |
"""Generate a 500-word Section 7 narrative report for the literature review paper.
|
| 515 |
Uses themes and taxonomy mapping via Mistral. Saves narrative.txt.
|
|
|
|
| 568 |
compare_with_taxonomy,
|
| 569 |
generate_comparison_csv,
|
| 570 |
export_narrative,
|
| 571 |
+
]
|