Spaces:
Sleeping
Sleeping
Create ingest/utils.py
Browse files- ingest/utils.py +25 -0
ingest/utils.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def highlight_terms(text: str, query: str) -> str:
|
| 2 |
+
if not text or not query:
|
| 3 |
+
return text
|
| 4 |
+
return text.replace(query, f"**{query}**")
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def classify_foia_exemptions(text: str):
|
| 8 |
+
# Explainable, rule-based, non-inferential
|
| 9 |
+
exemptions = []
|
| 10 |
+
lowered = text.lower()
|
| 11 |
+
if "national security" in lowered:
|
| 12 |
+
exemptions.append("b(1)")
|
| 13 |
+
if "internal personnel" in lowered:
|
| 14 |
+
exemptions.append("b(2)")
|
| 15 |
+
if "privacy" in lowered:
|
| 16 |
+
exemptions.append("b(6)")
|
| 17 |
+
return exemptions or ["none"]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def format_citation(record: dict) -> str:
|
| 21 |
+
return (
|
| 22 |
+
f"{record.get('source')}. "
|
| 23 |
+
f"\"{record.get('title')}\". "
|
| 24 |
+
f"Retrieved from {record.get('url')}."
|
| 25 |
+
)
|