mabuseif commited on
Commit
bee9486
Β·
verified Β·
1 Parent(s): ae40509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -92,21 +92,31 @@ def load_css():
92
  """, unsafe_allow_html=True)
93
 
94
  # --- Helper Functions ---
 
 
 
 
95
  def encode_text_fragment(text):
96
  # Encode text for W3C Text Fragments, preserving only hyphens
97
  # En dashes (–) and em dashes (β€”) are encoded as %E2%80%93 and %E2%80%94
98
  return urllib.parse.quote(text, safe='-')
99
 
100
  def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
101
- data = f"{author}, {year} | {url} | {fragment_text} | {cited_text} | {username} | {task_name} | {current_date} | {current_time}"
 
 
 
 
102
  return hashlib.sha256(data.encode('utf-8')).hexdigest()
103
 
104
  def format_citation_html(url, fragment_text, author, year, scc_hash):
 
105
  encoded_fragment = encode_text_fragment(fragment_text)
106
  full_url = f"{url}#:~:text={encoded_fragment}"
107
  return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
108
 
109
  def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
 
110
  metadata = f"{username}β€”{task_name}β€”{current_date}β€”{current_time}"
111
  encoded_metadata = encode_text_fragment(metadata)
112
  full_url = f"{url}#:~:text={encoded_metadata}"
@@ -345,7 +355,10 @@ with tabs[0]:
345
  </div>
346
  """, unsafe_allow_html=True)
347
  else:
348
- scc_hash = generate_citation_hash(author_name, publication_year, source_url, annotated_text, annotated_text, username, task_name, current_date, current_time)
 
 
 
349
  citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
350
  citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(annotated_text)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
351
  metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
@@ -404,9 +417,12 @@ with tabs[1]:
404
  elif citation_base_url != hash_base_url:
405
  st.error("The citation URL and SCC index URL must point to the same base URL.")
406
  else:
 
 
 
407
  # Recompute hash
408
  recomputed_hash = generate_citation_hash(
409
- author, year, citation_base_url, citation_fragment, citation_fragment, username, task_name, date, time
410
  )
411
 
412
  if recomputed_hash == scc_hash:
 
92
  """, unsafe_allow_html=True)
93
 
94
  # --- Helper Functions ---
95
+ def normalise_hyphens(text):
96
+ # Replace hyphen variants with U+002D for internal consistency
97
+ return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
98
+
99
  def encode_text_fragment(text):
100
  # Encode text for W3C Text Fragments, preserving only hyphens
101
  # En dashes (–) and em dashes (β€”) are encoded as %E2%80%93 and %E2%80%94
102
  return urllib.parse.quote(text, safe='-')
103
 
104
  def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
105
+ # Normalise hyphens for consistent hash generation
106
+ normalised_fragment_text = normalise_hyphens(fragment_text)
107
+ normalised_cited_text = normalise_hyphens(cited_text)
108
+ normalised_task_name = normalise_hyphens(task_name)
109
+ data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalised_task_name} | {current_date} | {current_time}"
110
  return hashlib.sha256(data.encode('utf-8')).hexdigest()
111
 
112
  def format_citation_html(url, fragment_text, author, year, scc_hash):
113
+ # Use original fragment_text for text fragment URL to match external source
114
  encoded_fragment = encode_text_fragment(fragment_text)
115
  full_url = f"{url}#:~:text={encoded_fragment}"
116
  return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
117
 
118
  def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
119
+ # Use original task_name with em dashes for text fragment URL
120
  metadata = f"{username}β€”{task_name}β€”{current_date}β€”{current_time}"
121
  encoded_metadata = encode_text_fragment(metadata)
122
  full_url = f"{url}#:~:text={encoded_metadata}"
 
355
  </div>
356
  """, unsafe_allow_html=True)
357
  else:
358
+ # Normalise hyphens in user inputs for hash generation
359
+ normalised_annotated_text = normalise_hyphens(annotated_text)
360
+ normalised_task_name = normalise_hyphens(task_name)
361
+ scc_hash = generate_citation_hash(author_name, publication_year, source_url, normalised_annotated_text, normalised_annotated_text, username, normalised_task_name, current_date, current_time)
362
  citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
363
  citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(annotated_text)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
364
  metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
 
417
  elif citation_base_url != hash_base_url:
418
  st.error("The citation URL and SCC index URL must point to the same base URL.")
419
  else:
420
+ # Normalise hyphens for hash recomputation
421
+ normalised_citation_fragment = normalise_hyphens(citation_fragment)
422
+ normalised_task_name = normalise_hyphens(task_name)
423
  # Recompute hash
424
  recomputed_hash = generate_citation_hash(
425
+ author, year, citation_base_url, normalised_citation_fragment, normalised_citation_fragment, username, normalised_task_name, date, time
426
  )
427
 
428
  if recomputed_hash == scc_hash: