Commit ·
ed31594
1
Parent(s): 56ffdb9
Remove spacy dependency completely - use regex sentence splitting
Browse files- harness/src/extraction.py +9 -36
- harness/src/test_harness.py +93 -182
- requirements.txt +0 -2
harness/src/extraction.py
CHANGED
|
@@ -1,48 +1,21 @@
|
|
| 1 |
-
from spacy import load
|
| 2 |
import re
|
| 3 |
|
| 4 |
-
def load_spacy_model(model_name='en_core_web_sm'):
|
| 5 |
-
nlp = load(model_name)
|
| 6 |
-
return nlp
|
| 7 |
-
|
| 8 |
def normalize_text(text):
|
| 9 |
"""Normalize text for comparison: lowercase, strip punctuation."""
|
| 10 |
return re.sub(r'[^\w\s]', '', text.lower().strip())
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def extract_hard_commitments(text, nlp=None):
|
| 13 |
"""Extract commitments using expanded modal keyword detection."""
|
| 14 |
-
if nlp is None:
|
| 15 |
-
nlp = load_spacy_model()
|
| 16 |
-
|
| 17 |
-
doc = nlp(text)
|
| 18 |
commitments = set()
|
| 19 |
-
|
| 20 |
-
# Expanded modal keywords
|
| 21 |
hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'}
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
for sent in doc.sents:
|
| 26 |
-
sent_lower = sent.text.lower()
|
| 27 |
-
# Check for hard modals
|
| 28 |
if any(modal in sent_lower for modal in hard_modals):
|
| 29 |
-
commitments.add(sent.
|
| 30 |
-
# Check for soft modals
|
| 31 |
-
elif any(modal in sent_lower for modal in soft_modals):
|
| 32 |
-
commitments.add(sent.text.strip())
|
| 33 |
-
|
| 34 |
return commitments
|
| 35 |
-
|
| 36 |
-
def extract_from_texts(texts, model_name='en_core_web_sm'):
|
| 37 |
-
nlp = load_spacy_model(model_name)
|
| 38 |
-
all_commitments = {}
|
| 39 |
-
|
| 40 |
-
for text in texts:
|
| 41 |
-
commitments = extract_hard_commitments(text, nlp)
|
| 42 |
-
all_commitments[text] = commitments
|
| 43 |
-
|
| 44 |
-
return all_commitments
|
| 45 |
-
|
| 46 |
-
def extract_hard(text: str, nlp=None) -> set:
|
| 47 |
-
"""Shorthand for extract_hard_commitments."""
|
| 48 |
-
return extract_hard_commitments(text, nlp)
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
def normalize_text(text):
|
| 4 |
"""Normalize text for comparison: lowercase, strip punctuation."""
|
| 5 |
return re.sub(r'[^\w\s]', '', text.lower().strip())
|
| 6 |
|
| 7 |
+
def simple_sent_split(text):
|
| 8 |
+
"""Simple sentence splitter using regex"""
|
| 9 |
+
sentences = re.split(r'[.!?]+\s+|[.!?]+$', text)
|
| 10 |
+
return [s.strip() for s in sentences if s.strip()]
|
| 11 |
+
|
| 12 |
def extract_hard_commitments(text, nlp=None):
|
| 13 |
"""Extract commitments using expanded modal keyword detection."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
commitments = set()
|
|
|
|
|
|
|
| 15 |
hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'}
|
| 16 |
+
sentences = simple_sent_split(text)
|
| 17 |
+
for sent in sentences:
|
| 18 |
+
sent_lower = sent.lower()
|
|
|
|
|
|
|
|
|
|
| 19 |
if any(modal in sent_lower for modal in hard_modals):
|
| 20 |
+
commitments.add(sent.strip())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
return commitments
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
harness/src/test_harness.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
| 1 |
# Minimal Python Test Harness for Commitment Conservation Protocol
|
| 2 |
# This script implements the falsification protocol from Section 3 of the preprint.
|
| 3 |
-
#
|
| 4 |
-
# Requires: transformers, spacy, matplotlib, numpy
|
| 5 |
-
# Run: python test_harness.py
|
| 6 |
|
| 7 |
import os
|
| 8 |
import json
|
| 9 |
from transformers import pipeline
|
| 10 |
-
import spacy
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
from typing import List, Set
|
| 13 |
import numpy as np
|
|
@@ -15,8 +12,6 @@ from datetime import datetime
|
|
| 15 |
from .extraction import extract_hard_commitments
|
| 16 |
from .metrics import jaccard, hybrid_fidelity
|
| 17 |
|
| 18 |
-
# Load models
|
| 19 |
-
nlp = spacy.load("en_core_web_sm")
|
| 20 |
# Use lighter distilbart model for more faithful extraction-based summarization
|
| 21 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 22 |
translator_en_de = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de")
|
|
@@ -29,192 +24,108 @@ SAMPLE_SIGNALS = [
|
|
| 29 |
"You must pay $100 by Friday if the deal closes; it's likely rainy, so plan accordingly.",
|
| 30 |
"This function must return an integer.",
|
| 31 |
"Always verify the user's age before proceeding.",
|
| 32 |
-
"You must do this task immediately.",
|
| 33 |
-
# "Your custom text with commitments here."
|
| 34 |
]
|
| 35 |
|
| 36 |
-
def
|
| 37 |
-
"""
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
clauses = [c.strip() for c in sent.text.split(';')]
|
| 43 |
-
for clause in clauses:
|
| 44 |
-
clause_lower = clause.lower()
|
| 45 |
-
if any(modal in clause_lower for modal in ["must", "shall", "cannot", "required"]):
|
| 46 |
-
# Normalize: strip trailing punctuation, extra spaces
|
| 47 |
-
normalized = clause.strip().rstrip('.!?').strip()
|
| 48 |
-
commitments.add(normalized)
|
| 49 |
-
return commitments
|
| 50 |
|
| 51 |
-
def
|
| 52 |
-
"""
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
de = translator_en_de(signal, max_length=400, do_sample=False)[0]['translation_text']
|
| 58 |
-
para = translator_de_en(de, max_length=400, do_sample=False)[0]['translation_text']
|
| 59 |
-
|
| 60 |
-
# Abstraction: first sentence
|
| 61 |
-
abstract = signal.split(".")[0].strip()
|
| 62 |
-
|
| 63 |
-
return [summ, para, abstract]
|
| 64 |
-
|
| 65 |
-
def compute_intersection_commitments(signal: str) -> Set[str]:
|
| 66 |
-
"""Compute C_hard,op as intersection of transformed extractions."""
|
| 67 |
-
transforms = apply_transformations(signal)
|
| 68 |
-
all_commitments = [extract_hard_commitments(t) for t in transforms]
|
| 69 |
-
|
| 70 |
-
# Debug output
|
| 71 |
-
print(f"\n[DEBUG] Transform commitments:")
|
| 72 |
-
for i, (t, c) in enumerate(zip(transforms, all_commitments)):
|
| 73 |
-
print(f" Transform {i+1}: {t[:60]}... -> {len(c)} commitments: {c}")
|
| 74 |
-
|
| 75 |
-
if all_commitments:
|
| 76 |
-
intersection = set.intersection(*all_commitments)
|
| 77 |
-
print(f" Intersection: {intersection}")
|
| 78 |
-
return intersection
|
| 79 |
-
return set()
|
| 80 |
|
| 81 |
-
def
|
| 82 |
-
"""
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
# Compress normally
|
| 101 |
-
compressed = summarizer(signal, max_length=max_length, min_length=5, do_sample=False)[0]['summary_text']
|
| 102 |
-
|
| 103 |
-
# Check what's preserved
|
| 104 |
-
compressed_commitments = extract_hard_commitments(compressed)
|
| 105 |
-
missing = original_commitments - compressed_commitments
|
| 106 |
-
|
| 107 |
-
# If commitments missing, enforce by appending
|
| 108 |
-
if missing:
|
| 109 |
-
# Append missing commitments
|
| 110 |
-
enforcement_text = " " + " ".join(missing)
|
| 111 |
-
# Truncate if needed to fit in max_length (rough token estimate: 4 chars per token)
|
| 112 |
-
estimated_tokens = len(compressed + enforcement_text) // 4
|
| 113 |
-
if estimated_tokens > max_length:
|
| 114 |
-
# Truncate summary to make room
|
| 115 |
-
available_chars = max_length * 4 - len(enforcement_text)
|
| 116 |
-
compressed = compressed[:max(0, available_chars)] + "..."
|
| 117 |
-
compressed = compressed + enforcement_text
|
| 118 |
-
|
| 119 |
return compressed
|
| 120 |
|
| 121 |
-
def
|
| 122 |
-
"""
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
# Back-translate
|
| 128 |
-
de = translator_en_de(signal, max_length=400, do_sample=False)[0]['translation_text']
|
| 129 |
-
paraphrased = translator_de_en(de, max_length=400, do_sample=False)[0]['translation_text']
|
| 130 |
-
|
| 131 |
-
# Check preservation
|
| 132 |
-
para_commitments = extract_hard_commitments(paraphrased)
|
| 133 |
-
missing = original_commitments - para_commitments
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
base = extract_hard_commitments(signal)
|
| 145 |
-
mode = "ENFORCED" if enforce else "BASELINE"
|
| 146 |
-
print(f"\n{'='*80}")
|
| 147 |
-
print(f"Testing signal ({mode}): {signal}")
|
| 148 |
-
print(f"Base commitments (from original): {base}")
|
| 149 |
-
print(f"{'='*80}")
|
| 150 |
-
fid_vals = []
|
| 151 |
-
for sigma in SIGMA_GRID:
|
| 152 |
-
if enforce:
|
| 153 |
-
compressed = compress_with_enforcement(signal, sigma)
|
| 154 |
else:
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
plt.xlabel("Compression Threshold (σ)", fontsize=12)
|
| 166 |
-
plt.ylabel("Fid_hard(σ)", fontsize=12)
|
| 167 |
-
mode_str = "ENFORCED" if enforce else "BASELINE"
|
| 168 |
-
plt.title(f"{mode_str} Fidelity vs σ for: {signal[:50]}...\n{timestamp}", fontsize=11)
|
| 169 |
-
plt.gca().invert_xaxis()
|
| 170 |
-
plt.grid(alpha=0.3)
|
| 171 |
-
plt.ylim(-0.05, 1.05)
|
| 172 |
-
plt.tight_layout()
|
| 173 |
-
mode_file = mode_str.lower()
|
| 174 |
-
plt.savefig(f"fid_plot_{mode_file}_{hash(signal)}.png", dpi=150)
|
| 175 |
-
plt.close() # Use close() instead of show() to avoid blocking in tests
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
base = extract_hard_commitments(signal)
|
| 183 |
-
mode = "ENFORCED" if enforce else "BASELINE"
|
| 184 |
-
deltas = []
|
| 185 |
-
current = signal
|
| 186 |
-
for n in range(depth + 1):
|
| 187 |
-
cur_commitments = extract_hard_commitments(current)
|
| 188 |
-
delta = 1.0 - jaccard(base, cur_commitments)
|
| 189 |
-
deltas.append(delta)
|
| 190 |
-
# Recursive transformation: paraphrase
|
| 191 |
-
if enforce:
|
| 192 |
-
current = paraphrase_with_enforcement(current)
|
| 193 |
-
else:
|
| 194 |
-
current = apply_transformations(current)[1] # Use paraphrase
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
plt.figure(figsize=(10, 6))
|
| 199 |
-
plt.plot(range(depth + 1), deltas, marker='o', linewidth=2, markersize=8)
|
| 200 |
-
plt.xlabel("Recursion Step (n)", fontsize=12)
|
| 201 |
-
plt.ylabel("Δ_hard(n)", fontsize=12)
|
| 202 |
-
mode_str = "ENFORCED" if enforce else "BASELINE"
|
| 203 |
-
plt.title(f"{mode_str} Drift vs n for: {signal[:50]}...\n{timestamp}", fontsize=11)
|
| 204 |
-
plt.grid(alpha=0.3)
|
| 205 |
-
plt.ylim(-0.05, 1.05)
|
| 206 |
-
plt.tight_layout()
|
| 207 |
-
mode_file = mode_str.lower()
|
| 208 |
-
plt.savefig(f"delta_plot_{mode_file}_{hash(signal)}.png", dpi=150)
|
| 209 |
-
plt.close() # Use close() instead of show() to avoid blocking in tests
|
| 210 |
|
| 211 |
-
return
|
| 212 |
-
|
| 213 |
-
if __name__ == "__main__":
|
| 214 |
-
# Run on sample signals
|
| 215 |
-
for signal in SAMPLE_SIGNALS:
|
| 216 |
-
print(f"\nTesting signal: {signal}")
|
| 217 |
-
compression_sweep(signal)
|
| 218 |
-
# Skip recursion_test for now (uses slow translation models)
|
| 219 |
-
# recursion_test(signal)
|
| 220 |
-
print("Compression sweep plot saved.")
|
|
|
|
| 1 |
# Minimal Python Test Harness for Commitment Conservation Protocol
|
| 2 |
# This script implements the falsification protocol from Section 3 of the preprint.
|
| 3 |
+
# No spacy required - uses simple regex-based sentence splitting
|
|
|
|
|
|
|
| 4 |
|
| 5 |
import os
|
| 6 |
import json
|
| 7 |
from transformers import pipeline
|
|
|
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
from typing import List, Set
|
| 10 |
import numpy as np
|
|
|
|
| 12 |
from .extraction import extract_hard_commitments
|
| 13 |
from .metrics import jaccard, hybrid_fidelity
|
| 14 |
|
|
|
|
|
|
|
| 15 |
# Use lighter distilbart model for more faithful extraction-based summarization
|
| 16 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 17 |
translator_en_de = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de")
|
|
|
|
| 24 |
"You must pay $100 by Friday if the deal closes; it's likely rainy, so plan accordingly.",
|
| 25 |
"This function must return an integer.",
|
| 26 |
"Always verify the user's age before proceeding.",
|
| 27 |
+
"You must do this task immediately.",
|
|
|
|
| 28 |
]
|
| 29 |
|
| 30 |
+
def baseline_compression(text: str, sigma: int = 80) -> str:
|
| 31 |
+
"""Apply summarization without enforcing commitments."""
|
| 32 |
+
if len(text) <= sigma:
|
| 33 |
+
return text
|
| 34 |
+
result = summarizer(text, max_length=sigma, min_length=10, do_sample=False)
|
| 35 |
+
return result[0]['summary_text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
def back_translation(text: str) -> str:
|
| 38 |
+
"""Translate en->de->en"""
|
| 39 |
+
de_result = translator_en_de(text, max_length=512)
|
| 40 |
+
de_text = de_result[0]['translation_text']
|
| 41 |
+
en_result = translator_de_en(de_text, max_length=512)
|
| 42 |
+
return en_result[0]['translation_text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
def enforced_compression(text: str, sigma: int = 80, max_retries: int = 3) -> str:
|
| 45 |
+
"""Compress with re-injection loop until commitments conserved or max_retries hit."""
|
| 46 |
+
original_commitments = extract_hard_commitments(text)
|
| 47 |
+
if not original_commitments:
|
| 48 |
+
return baseline_compression(text, sigma)
|
| 49 |
+
|
| 50 |
+
for attempt in range(max_retries):
|
| 51 |
+
compressed = baseline_compression(text, sigma)
|
| 52 |
+
compressed_commitments = extract_hard_commitments(compressed)
|
| 53 |
+
|
| 54 |
+
if original_commitments.issubset(compressed_commitments):
|
| 55 |
+
return compressed
|
| 56 |
+
|
| 57 |
+
# Re-inject missing commitments
|
| 58 |
+
missing = original_commitments - compressed_commitments
|
| 59 |
+
missing_str = " ".join(missing)
|
| 60 |
+
text = f"{compressed} {missing_str}"
|
| 61 |
+
|
| 62 |
+
# Fallback after max_retries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return compressed
|
| 64 |
|
| 65 |
+
def recursion_test(signal: str, depth: int = RECURSION_DEPTH, enforce: bool = False):
|
| 66 |
+
"""Run compression recursively and track fidelity/drift."""
|
| 67 |
+
original = extract_hard_commitments(signal)
|
| 68 |
+
if not original:
|
| 69 |
+
return {"error": "No commitments found in signal"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
history = [signal]
|
| 72 |
+
commitments_over_time = [original]
|
| 73 |
+
fidelities = []
|
| 74 |
+
drifts = []
|
| 75 |
|
| 76 |
+
current = signal
|
| 77 |
+
for i in range(depth):
|
| 78 |
+
# Alternate transformations
|
| 79 |
+
if i % 3 == 0:
|
| 80 |
+
transformed = back_translation(current)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
else:
|
| 82 |
+
if enforce:
|
| 83 |
+
transformed = enforced_compression(current, sigma=80)
|
| 84 |
+
else:
|
| 85 |
+
transformed = baseline_compression(current, sigma=80)
|
| 86 |
+
|
| 87 |
+
history.append(transformed)
|
| 88 |
+
extracted = extract_hard_commitments(transformed)
|
| 89 |
+
commitments_over_time.append(extracted)
|
| 90 |
+
|
| 91 |
+
fid = jaccard(original, extracted)
|
| 92 |
+
drift = 1.0 - fid
|
| 93 |
+
fidelities.append(fid)
|
| 94 |
+
drifts.append(drift)
|
| 95 |
+
|
| 96 |
+
current = transformed
|
| 97 |
+
|
| 98 |
+
avg_fidelity = np.mean(fidelities)
|
| 99 |
+
avg_drift = np.mean(drifts)
|
| 100 |
+
stability = sum(1 for f in fidelities if f >= 0.8) / len(fidelities) * 100
|
| 101 |
+
|
| 102 |
+
return {
|
| 103 |
+
"original_commitments": original,
|
| 104 |
+
"avg_fidelity": avg_fidelity,
|
| 105 |
+
"avg_drift": avg_drift,
|
| 106 |
+
"stability_pct": stability,
|
| 107 |
+
"fidelities": fidelities,
|
| 108 |
+
"drifts": drifts,
|
| 109 |
+
"history": history,
|
| 110 |
+
"commitments_over_time": commitments_over_time
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
def plot_comparison(baseline_results, enforced_results, save_path=None):
|
| 114 |
+
"""Plot fidelity curves for baseline vs enforced."""
|
| 115 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 116 |
|
| 117 |
+
iterations = range(1, len(baseline_results['fidelities']) + 1)
|
| 118 |
+
ax.plot(iterations, baseline_results['fidelities'], 'o-', label='Baseline', color='red')
|
| 119 |
+
ax.plot(iterations, enforced_results['fidelities'], 's-', label='Enforced', color='green')
|
| 120 |
+
ax.axhline(y=0.8, linestyle='--', color='gray', alpha=0.5, label='Fidelity Threshold (0.8)')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
ax.set_xlabel('Iteration')
|
| 123 |
+
ax.set_ylabel('Fidelity (Jaccard)')
|
| 124 |
+
ax.set_title('Commitment Conservation: Baseline vs Enforced')
|
| 125 |
+
ax.legend()
|
| 126 |
+
ax.grid(True, alpha=0.3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
if save_path:
|
| 129 |
+
plt.savefig(save_path, dpi=150, bbox_inches='tight')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -3,7 +3,5 @@ transformers>=4.30
|
|
| 3 |
torch
|
| 4 |
pandas
|
| 5 |
matplotlib
|
| 6 |
-
spacy==3.7.2
|
| 7 |
sentencepiece
|
| 8 |
sacremoses
|
| 9 |
-
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
|
|
|
| 3 |
torch
|
| 4 |
pandas
|
| 5 |
matplotlib
|
|
|
|
| 6 |
sentencepiece
|
| 7 |
sacremoses
|
|
|