Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -92,21 +92,31 @@ def load_css():
|
|
| 92 |
""", unsafe_allow_html=True)
|
| 93 |
|
| 94 |
# --- Helper Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def encode_text_fragment(text):
|
| 96 |
# Encode text for W3C Text Fragments, preserving only hyphens
|
| 97 |
# En dashes (β) and em dashes (β) are encoded as %E2%80%93 and %E2%80%94
|
| 98 |
return urllib.parse.quote(text, safe='-')
|
| 99 |
|
| 100 |
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
| 103 |
|
| 104 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
|
|
|
| 105 |
encoded_fragment = encode_text_fragment(fragment_text)
|
| 106 |
full_url = f"{url}#:~:text={encoded_fragment}"
|
| 107 |
return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
|
| 108 |
|
| 109 |
def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
|
|
|
|
| 110 |
metadata = f"{username}β{task_name}β{current_date}β{current_time}"
|
| 111 |
encoded_metadata = encode_text_fragment(metadata)
|
| 112 |
full_url = f"{url}#:~:text={encoded_metadata}"
|
|
@@ -345,7 +355,10 @@ with tabs[0]:
|
|
| 345 |
</div>
|
| 346 |
""", unsafe_allow_html=True)
|
| 347 |
else:
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
| 349 |
citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
|
| 350 |
citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(annotated_text)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
|
| 351 |
metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
|
|
@@ -404,9 +417,12 @@ with tabs[1]:
|
|
| 404 |
elif citation_base_url != hash_base_url:
|
| 405 |
st.error("The citation URL and SCC index URL must point to the same base URL.")
|
| 406 |
else:
|
|
|
|
|
|
|
|
|
|
| 407 |
# Recompute hash
|
| 408 |
recomputed_hash = generate_citation_hash(
|
| 409 |
-
author, year, citation_base_url,
|
| 410 |
)
|
| 411 |
|
| 412 |
if recomputed_hash == scc_hash:
|
|
|
|
| 92 |
""", unsafe_allow_html=True)
|
| 93 |
|
| 94 |
# --- Helper Functions ---
|
| 95 |
+
def normalise_hyphens(text):
|
| 96 |
+
# Replace hyphen variants with U+002D for internal consistency
|
| 97 |
+
return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
|
| 98 |
+
|
| 99 |
def encode_text_fragment(text):
|
| 100 |
# Encode text for W3C Text Fragments, preserving only hyphens
|
| 101 |
# En dashes (β) and em dashes (β) are encoded as %E2%80%93 and %E2%80%94
|
| 102 |
return urllib.parse.quote(text, safe='-')
|
| 103 |
|
| 104 |
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
| 105 |
+
# Normalise hyphens for consistent hash generation
|
| 106 |
+
normalised_fragment_text = normalise_hyphens(fragment_text)
|
| 107 |
+
normalised_cited_text = normalise_hyphens(cited_text)
|
| 108 |
+
normalised_task_name = normalise_hyphens(task_name)
|
| 109 |
+
data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalised_task_name} | {current_date} | {current_time}"
|
| 110 |
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
| 111 |
|
| 112 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
| 113 |
+
# Use original fragment_text for text fragment URL to match external source
|
| 114 |
encoded_fragment = encode_text_fragment(fragment_text)
|
| 115 |
full_url = f"{url}#:~:text={encoded_fragment}"
|
| 116 |
return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
|
| 117 |
|
| 118 |
def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
|
| 119 |
+
# Use original task_name with em dashes for text fragment URL
|
| 120 |
metadata = f"{username}β{task_name}β{current_date}β{current_time}"
|
| 121 |
encoded_metadata = encode_text_fragment(metadata)
|
| 122 |
full_url = f"{url}#:~:text={encoded_metadata}"
|
|
|
|
| 355 |
</div>
|
| 356 |
""", unsafe_allow_html=True)
|
| 357 |
else:
|
| 358 |
+
# Normalise hyphens in user inputs for hash generation
|
| 359 |
+
normalised_annotated_text = normalise_hyphens(annotated_text)
|
| 360 |
+
normalised_task_name = normalise_hyphens(task_name)
|
| 361 |
+
scc_hash = generate_citation_hash(author_name, publication_year, source_url, normalised_annotated_text, normalised_annotated_text, username, normalised_task_name, current_date, current_time)
|
| 362 |
citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
|
| 363 |
citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(annotated_text)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
|
| 364 |
metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
|
|
|
|
| 417 |
elif citation_base_url != hash_base_url:
|
| 418 |
st.error("The citation URL and SCC index URL must point to the same base URL.")
|
| 419 |
else:
|
| 420 |
+
# Normalise hyphens for hash recomputation
|
| 421 |
+
normalised_citation_fragment = normalise_hyphens(citation_fragment)
|
| 422 |
+
normalised_task_name = normalise_hyphens(task_name)
|
| 423 |
# Recompute hash
|
| 424 |
recomputed_hash = generate_citation_hash(
|
| 425 |
+
author, year, citation_base_url, normalised_citation_fragment, normalised_citation_fragment, username, normalised_task_name, date, time
|
| 426 |
)
|
| 427 |
|
| 428 |
if recomputed_hash == scc_hash:
|