mabuseif commited on
Commit
5f14b38
·
verified ·
1 Parent(s): 52a245c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -85
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
- import streamlit.components.v1 as components
3
  import hashlib
4
  import urllib.parse
5
  from datetime import datetime
@@ -109,48 +108,46 @@ def format_metadata_html(url, author, year, scc_hash, username, task_name, curre
109
  def check_for_fragment(url):
110
  return '#:~:text=' in url
111
 
112
- def parse_citation(citation_html):
113
- match = re.match(r'<a href="([^"]+)#:~:text=([^"]+)" data-hash="([^"]+)">([^<]+) \((\d{4})\)</a>', citation_html)
 
114
  if match:
115
- url, encoded_fragment, scc_hash, author, year = match.groups()
116
- fragment_text = urllib.parse.unquote(encoded_fragment)
117
- return author, year, url, fragment_text, scc_hash
118
- return None, None, None, None, None
119
-
120
- def parse_metadata_hash(metadata_html):
121
- match = re.match(r'<a href="([^"]+)#:~:text=([^"]+)" data-hash="([^"]+)">[^<]+\(\d{4}\)\. ([^<]+)</a>', metadata_html)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if match:
123
- url, encoded_metadata, scc_hash, same_hash = match.groups()
124
- metadata_parts = urllib.parse.unquote(encoded_metadata).split('—')
 
 
 
 
 
 
125
  if len(metadata_parts) == 4:
126
  username, task_name, date, time = metadata_parts
127
- return scc_hash, username, task_name, date, time
128
- return None, None, None, None, None
129
-
130
- # --- JavaScript for capturing pasted links ---
131
- def verification_js():
132
- return """
133
- <script>
134
- function updateCitationData(inputId, outputId) {
135
- const input = document.getElementById(inputId);
136
- const output = document.getElementById(outputId);
137
- if (input && output) {
138
- const link = input.querySelector('a');
139
- output.value = link ? link.outerHTML : '';
140
- const event = new Event('input', { bubbles: true });
141
- output.dispatchEvent(event);
142
- }
143
- }
144
-
145
- document.getElementById('citation_input').addEventListener('paste', function(e) {
146
- setTimeout(() => updateCitationData('citation_input', 'citation_output'), 100);
147
- });
148
-
149
- document.getElementById('hash_input').addEventListener('paste', function(e) {
150
- setTimeout(() => updateCitationData('hash_input', 'hash_output'), 100);
151
- });
152
- </script>
153
- """
154
 
155
  # --- Live Clock JavaScript ---
156
  def live_clock():
@@ -176,7 +173,7 @@ def live_clock():
176
  const date = `${parts[4].value}-${parts[2].value}-${parts[0].value}`;
177
  const time = `${parts[6].value}:${parts[8].value}:${parts[10].value}`;
178
  const datetimeElement = document.getElementById('live_datetime');
179
- if (datetimeElement) {
180
  datetimeElement.innerText = `${date} ${time}`;
181
  }
182
  }
@@ -185,13 +182,6 @@ def live_clock():
185
  </script>
186
  """
187
 
188
- # --- Callbacks for updating session state ---
189
- def update_citation_html():
190
- st.session_state.citation_html = st.session_state.citation_output
191
-
192
- def update_hash_html():
193
- st.session_state.hash_html = st.session_state.hash_output
194
-
195
  # --- Streamlit App ---
196
  st.set_page_config(layout="wide", page_title="Smart Context Citation Tool")
197
 
@@ -274,7 +264,7 @@ with tabs[0]:
274
 
275
  # Live date and time display
276
  st.markdown("### Current Date and Time")
277
- components.html(live_clock(), height=50)
278
 
279
  # Get current date and time in Melbourne timezone for hash generation
280
  melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE)
@@ -331,56 +321,44 @@ with tabs[1]:
331
  st.header("Verify Citation")
332
  st.markdown("""
333
  <div class="info-card">
334
- Paste the rendered citation and hash links (e.g., "Author (Year)" and "Author (Year). Hash") directly from the 'Citation Generator' tab to verify the citation's authenticity.
335
  </div>
336
  """, unsafe_allow_html=True)
337
 
338
  # Input fields for citation and hash
339
- st.markdown("""
340
- <div>
341
- <label>Paste Citation (with embedded link)</label>
342
- <div id="citation_input" contenteditable="true" style="border: 1px solid #e0e0e0; border-radius: 4px; padding: 0.5rem; min-height: 50px; margin-bottom: 1rem;"></div>
343
- <input type="hidden" id="citation_output" name="citation_output" value="">
344
- <label>Paste Hash (with embedded link)</label>
345
- <div id="hash_input" contenteditable="true" style="border: 1px solid #e0e0e0; border-radius: 4px; padding: 0.5rem; min-height: 50px; margin-bottom: 1rem;"></div>
346
- <input type="hidden" id="hash_output" name="hash_output" value="">
347
- </div>
348
- """, unsafe_allow_html=True)
349
-
350
- # Add JavaScript to capture pasted link data
351
- components.html(verification_js(), height=0)
352
 
353
- # Initialize session state
354
- if 'citation_html' not in st.session_state:
355
- st.session_state.citation_html = ''
356
- if 'hash_html' not in st.session_state:
357
- st.session_state.hash_html = ''
358
- if 'citation_output' not in st.session_state:
359
- st.session_state.citation_output = ''
360
- if 'hash_output' not in st.session_state:
361
- st.session_state.hash_output = ''
362
-
363
- # Hidden inputs with on_change callbacks
364
- st.text_input("Citation HTML", key="citation_output", value=st.session_state.citation_output, on_change=update_citation_html, label_visibility="collapsed")
365
- st.text_input("Hash HTML", key="hash_output", value=st.session_state.hash_output, on_change=update_hash_html, label_visibility="collapsed")
366
 
367
  verify_button = st.button("Verify Citation", type="primary", use_container_width=True)
368
 
369
  if verify_button:
370
- if not (st.session_state.citation_html and st.session_state.hash_html):
371
- st.error("Please paste both the citation and hash links before verifying.")
372
  else:
373
- # Parse citation
374
- author, year, url, fragment_text, citation_hash = parse_citation(st.session_state.citation_html)
375
- # Parse hash and metadata
376
- scc_hash, username, task_name, date, time = parse_metadata_hash(st.session_state.hash_html)
377
-
378
- if not all([author, year, url, fragment_text, scc_hash, username, task_name, date, time]):
379
- st.error("Invalid citation or hash format. Please ensure both inputs are correctly pasted links from the generated output.")
 
 
 
 
 
 
 
 
380
  else:
381
  # Recompute hash
382
  recomputed_hash = generate_citation_hash(
383
- author, year, url, fragment_text, fragment_text, username, task_name, date, time
384
  )
385
 
386
  if recomputed_hash == scc_hash:
 
1
  import streamlit as st
 
2
  import hashlib
3
  import urllib.parse
4
  from datetime import datetime
 
108
  def check_for_fragment(url):
109
  return '#:~:text=' in url
110
 
111
+ def parse_citation_text(citation_text):
112
+ # Match "Author (Year)" or "(Author, Year)"
113
+ match = re.match(r'(\(?.*?\)?)\s*\((\d{4})\)', citation_text.strip())
114
  if match:
115
+ author, year = match.groups()
116
+ author = author.strip('()').strip()
117
+ return author, year
118
+ return None, None
119
+
120
+ def parse_url(url):
121
+ if not url:
122
+ return None, None
123
+ try:
124
+ # Extract fragment text from URL
125
+ match = re.search(r'#:~:text=([^&]+)', url)
126
+ fragment_text = urllib.parse.unquote(match.group(1)) if match else None
127
+ # Extract base URL (before #)
128
+ base_url = url.split('#')[0]
129
+ return base_url, fragment_text
130
+ except:
131
+ return None, None
132
+
133
+ def parse_hash_text(hash_text):
134
+ # Match "Author (Year). <hash>"
135
+ match = re.match(r'.*?\(\d{4}\)\.\s*([0-9a-f]{64})', hash_text.strip())
136
  if match:
137
+ return match.group(1)
138
+ return None
139
+
140
+ def parse_metadata(fragment_text):
141
+ if not fragment_text:
142
+ return None, None, None, None
143
+ try:
144
+ metadata_parts = fragment_text.split('—')
145
  if len(metadata_parts) == 4:
146
  username, task_name, date, time = metadata_parts
147
+ return username, task_name, date, time
148
+ return None, None, None, None
149
+ except:
150
+ return None, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  # --- Live Clock JavaScript ---
153
  def live_clock():
 
173
  const date = `${parts[4].value}-${parts[2].value}-${parts[0].value}`;
174
  const time = `${parts[6].value}:${parts[8].value}:${parts[10].value}`;
175
  const datetimeElement = document.getElementById('live_datetime');
176
+ if datetimeElement) {
177
  datetimeElement.innerText = `${date} ${time}`;
178
  }
179
  }
 
182
  </script>
183
  """
184
 
 
 
 
 
 
 
 
185
  # --- Streamlit App ---
186
  st.set_page_config(layout="wide", page_title="Smart Context Citation Tool")
187
 
 
264
 
265
  # Live date and time display
266
  st.markdown("### Current Date and Time")
267
+ st.components.v1.html(live_clock(), height=50)
268
 
269
  # Get current date and time in Melbourne timezone for hash generation
270
  melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE)
 
321
  st.header("Verify Citation")
322
  st.markdown("""
323
  <div class="info-card">
324
+ Copy the citation text (e.g., "Abuseif et al. (2025)" or "(Abuseif et al., 2025)") and its hyperlink URL from the 'Citation Generator' tab. Similarly, copy the hash text (e.g., "Abuseif et al. (2025). <hash>") and its hyperlink URL. Paste them into the fields below to verify the citation's authenticity. To copy the URL, right-click the hyperlink and select "Copy Link Address".
325
  </div>
326
  """, unsafe_allow_html=True)
327
 
328
  # Input fields for citation and hash
329
+ st.subheader("Citation Information")
330
+ citation_text = st.text_input("Citation Text", help="Paste the citation text, e.g., 'Abuseif et al. (2025)' or '(Abuseif et al., 2025)'", placeholder="e.g., Abuseif et al. (2025)")
331
+ citation_url = st.text_input("Citation URL", help="Paste the hyperlink URL from the citation, e.g., 'https://example.com#:~:text=fragment'", placeholder="e.g., https://example.com#:~:text=fragment")
 
 
 
 
 
 
 
 
 
 
332
 
333
+ st.subheader("Hash Information")
334
+ hash_text = st.text_input("Hash Text", help="Paste the hash text, e.g., 'Abuseif et al. (2025). <hash>'", placeholder="e.g., Abuseif et al. (2025). <hash>")
335
+ hash_url = st.text_input("Hash URL", help="Paste the hyperlink URL from the hash, e.g., 'https://example.com#:~:text=metadata'", placeholder="e.g., https://example.com#:~:text=metadata")
 
 
 
 
 
 
 
 
 
 
336
 
337
  verify_button = st.button("Verify Citation", type="primary", use_container_width=True)
338
 
339
  if verify_button:
340
+ if not all([citation_text, citation_url, hash_text, hash_url]):
341
+ st.error("Please provide all fields (citation text, citation URL, hash text, hash URL) before verifying.")
342
  else:
343
+ # Parse citation text
344
+ author, year = parse_citation_text(citation_text)
345
+ # Parse citation URL
346
+ citation_base_url, citation_fragment = parse_url(citation_url)
347
+ # Parse hash text
348
+ scc_hash = parse_hash_text(hash_text)
349
+ # Parse hash URL
350
+ hash_base_url, hash_fragment = parse_url(hash_url)
351
+ # Parse metadata from hash URL fragment
352
+ username, task_name, date, time = parse_metadata(hash_fragment)
353
+
354
+ if not all([author, year, citation_base_url, citation_fragment, scc_hash, hash_base_url, username, task_name, date, time]):
355
+ st.error("Invalid input format. Ensure the citation text, URLs, and hash text are correctly pasted from the generated output.")
356
+ elif citation_base_url != hash_base_url:
357
+ st.error("The citation URL and hash URL must point to the same base URL.")
358
  else:
359
  # Recompute hash
360
  recomputed_hash = generate_citation_hash(
361
+ author, year, citation_base_url, citation_fragment, citation_fragment, username, task_name, date, time
362
  )
363
 
364
  if recomputed_hash == scc_hash: