mabuseif commited on
Commit
b2b6c73
·
verified ·
1 Parent(s): 965e3a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +341 -230
app.py CHANGED
@@ -99,6 +99,8 @@ def load_css():
99
  padding: 0.8rem;
100
  margin: 0.5rem 0;
101
  border-left: 1px solid #e0e0e0;
 
 
102
  }
103
 
104
  .rendered-citation {
@@ -106,22 +108,30 @@ def load_css():
106
  font-size: 1.4rem;
107
  }
108
 
109
- .citations-table {
110
  margin: 1rem 0;
111
  width: 100%;
112
  border-collapse: collapse;
113
  }
114
 
115
- .citations-table th, .citations-table td {
116
  border: 1px solid #e0e0e0;
117
  padding: 0.5rem;
118
  text-align: left;
119
  }
120
 
121
- .citations-table th {
122
  background: #f8f8f8;
123
  font-weight: bold;
124
  }
 
 
 
 
 
 
 
 
125
  </style>
126
  """, unsafe_allow_html=True)
127
 
@@ -163,6 +173,13 @@ def format_citation_html(url, fragment_text, author, year, scc_hash):
163
  full_url = f"{url}#:~:text={encoded_fragment}"
164
  return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
165
 
 
 
 
 
 
 
 
166
  def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
167
  # Use original task_name with em dashes for text fragment URL
168
  metadata = f"{username}—{task_name}—{current_date}—{current_time}"
@@ -233,7 +250,7 @@ def get_excel_download_link(df, filename="citation_data.xlsx"):
233
 
234
  for col_idx, col in enumerate(headers):
235
  value = row[col]
236
- if col in ["Citation", "SCC Index"]:
237
  # Extract URL and display text from HTML anchor tag
238
  match = re.search(r'<a href="([^"]+)"[^>]*>([^<]+)</a>', str(value))
239
  if match:
@@ -261,39 +278,6 @@ def get_excel_download_link(df, filename="citation_data.xlsx"):
261
  href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download citation data as Excel</a>'
262
  return href
263
 
264
- # --- Live Clock JavaScript ---
265
- def live_clock():
266
- return """
267
- <div class="datetime-display">
268
- <span id="live_datetime"></span>
269
- </div>
270
- <script>
271
- function updateClock() {
272
- const options = {
273
- timeZone: 'Australia/Melbourne',
274
- year: 'numeric',
275
- month: '2-digit',
276
- day: '2-digit',
277
- hour: '2-digit',
278
- minute: '2-digit',
279
- second: '2-digit',
280
- hour12: false
281
- };
282
- const formatter = new Intl.DateTimeFormat('en-AU', options);
283
- const now = new Date();
284
- const parts = formatter.formatToParts(now);
285
- const date = `${parts[4].value}-${parts[2].value}-${parts[0].value}`;
286
- const time = `${parts[6].value}:${parts[8].value}:${parts[10].value}`;
287
- const datetimeElement = document.getElementById('live_datetime');
288
- if (datetimeElement) {
289
- datetimeElement.innerText = `${date} ${time}`;
290
- }
291
- }
292
- updateClock();
293
- setInterval(updateClock, 1000);
294
- </script>
295
- """
296
-
297
  # --- Streamlit App ---
298
  st.set_page_config(layout="wide", page_title="Smart Context Citation Tool")
299
 
@@ -360,7 +344,7 @@ with st.expander("SCC Style Guidelines"):
360
  <li><strong>Author(s) Name:</strong> The author(s) of the source (e.g., Abuseif et al.).</li>
361
  <li><strong>Publication Year:</strong> The year of publication (e.g., 2023).</li>
362
  <li><strong>Source URL:</strong> The full URL of the source, without text fragments (e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046).</li>
363
- <li><strong>Annotated Text:</strong> The sentence or paragraph containing the information you are referencing from the source (e.g., A proposed design framework for green roof settings in general and trees on buildings in particular).</li>
364
  </ul>
365
  </li>
366
  <li><strong>Generate Citation:</strong> Click the &quot;Generate Citation&quot; button.</li>
@@ -377,228 +361,355 @@ with st.expander("SCC Style Guidelines"):
377
  <ol>
378
  <li>Paste the reference directly in the appropriate place within your document.</li>
379
  <li>Create an SCC Index (instead of a traditional reference list), and paste the corresponding SCC Index entry for each reference you’ve used.</li>
 
380
  </ol>
381
  <h4>Verifying Citations (for Markers and Reviewers)</h4>
382
  <ol>
383
- <li><strong>Access the Tool:</strong> Open the &quot;Verify Citation&quot; tab.</li>
384
- <li><strong>Enter Citation Information:</strong>
385
- <ul>
386
- <li><strong>Citation Text:</strong> Paste the citation text (e.g., Abuseif et al. (2023) or (Abuseif et al., 2023)).</li>
387
- <li><strong>Citation URL:</strong> Paste the hyperlink URL from the citation (right-click and select &quot;Copy Link Address&quot;).</li>
388
- </ul>
389
- </li>
390
- <li><strong>Enter SCC Index Information:</strong>
391
  <ul>
392
- <li><strong>SCC Index Text:</strong> Paste the index text (e.g., Abuseif et al. (2023). cda7ba19e51e430107e58696758fdf79b8f016d8f27e8f8691ad713e7c8bc668).</li>
393
- <li><strong>SCC Index URL:</strong> Paste the hyperlink URL from the index (right-click and select &quot;Copy Link Address&quot;).</li>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  </ul>
395
  </li>
396
- <li><strong>Verify Citation:</strong> Click the &quot;Verify Citation&quot; button.</li>
397
- <li><strong>Review Result:</strong>
398
  <ul>
399
- <li><strong>Authentic Citation:</strong> Displayed in green if the hash matches, confirming integrity.</li>
400
- <li><strong>Unauthentic Citation:</strong> Displayed in red if the hash does not match, indicating potential tampering.</li>
 
 
 
 
 
 
401
  </ul>
402
  </li>
403
  </ol>
404
  </div>
405
  """, unsafe_allow_html=True)
406
 
 
407
  tabs = st.tabs(["Citation Generator", "Verify Citation"])
408
 
 
409
  with tabs[0]:
410
  st.markdown('<div class="tab-content">', unsafe_allow_html=True)
411
-
412
- # User Information Section
413
  st.subheader("User Information")
414
- col1, col2 = st.columns(2)
415
- with col1:
416
- username = st.text_input("Username", help="Your username for tracking purposes", placeholder="e.g., Majed")
417
- with col2:
418
- task_name = st.text_input("Task Name", help="The name of the task or project", placeholder="e.g., Design Strategies for Trees on Buildings")
419
-
420
- # Citation Info Section
421
- st.subheader("Citation Info")
422
- col3, col4 = st.columns(2)
423
- with col3:
424
- author_name = st.text_input("Author(s) Name", help="The author(s) of the source", placeholder="e.g., Abuseif et al.")
425
- with col4:
426
- publication_year = st.text_input("Publication Year", help="The year of publication", placeholder="e.g., 2023")
427
 
428
- col5, col6 = st.columns(2)
429
- with col5:
430
- source_url = st.text_input("Source URL", help="The full URL of the source", placeholder="https://www.sciencedirect.com/science/article/pii/S2772411523000046")
431
- with col6:
432
- annotated_text = st.text_input("Annotated Text", help="The sentence or paragraph containing the information you are referencing from the source", placeholder="e.g., A proposed design framework for green roof settings...")
433
-
434
- # Live date and time display
435
- st.markdown("### Current Date and Time (AEST)")
436
- st.components.v1.html(live_clock(), height=50)
437
-
438
- # Get current date and time in Melbourne timezone for hash generation
439
- melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE)
440
- current_datetime_melbourne = datetime.now(melbourne_tz)
441
- current_date = current_datetime_melbourne.strftime("%Y-%m-%d")
442
- current_time = current_datetime_melbourne.strftime("%H:%M:%S")
443
-
444
  generate_button = st.button("Generate Citation", type="primary", use_container_width=True)
445
-
446
  if generate_button:
 
447
  if not all([username, task_name, author_name, publication_year, source_url, annotated_text]):
448
  st.error("Please fill in all fields before generating a citation.")
 
 
 
 
449
  elif check_for_fragment(source_url):
450
- st.markdown("""
451
- <div class="warning-box">
452
- <strong>Warning:</strong> Your URL already contains a text fragment, which suggests you may have used AI assistance in your research. Please revisit the source, review the context carefully, and copy the source link again—ensuring it does not include any existing fragments.
453
- </div>
454
- """, unsafe_allow_html=True)
455
  else:
456
- # Generate hash using normalized inputs
457
- scc_hash = generate_citation_hash(author_name, publication_year, source_url, annotated_text, annotated_text, username, task_name, current_date, current_time)
458
- citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
459
- # Use the longest segment for the end-of-text citation link
460
- selected_fragment = select_longest_segment(annotated_text)
461
- citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(selected_fragment)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
462
- metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
463
-
464
- # --- Persistent Table with Clickable SCC Hash ---
465
-
466
- # First, ensure session state is initialized for the citation DataFrame
467
- if 'citation_df' not in st.session_state:
468
- st.session_state.citation_df = pd.DataFrame(columns=[
469
- "Username", "Task Name", "Time", "Date",
470
- "Citation", "SCC Index", "Annotated Text"
471
- ])
472
-
473
- # Create clickable HTML for SCC Index (full metadata link)
474
- clickable_index = metadata_link
475
-
476
- # Create new row data
477
- new_row = {
478
- "Username": username,
479
- "Task Name": task_name,
480
- "Time": current_time,
481
- "Date": current_date,
482
- "Citation": citation_link_start,
483
- "SCC Index": clickable_index,
484
- "Annotated Text": annotated_text
485
- }
486
-
487
- # Append the new row to the session state DataFrame
488
- new_df = pd.DataFrame([new_row])
489
- st.session_state.citation_df = pd.concat([st.session_state.citation_df, new_df], ignore_index=True)
490
-
491
- # Get the accumulated DataFrame for display and download
492
- df = st.session_state.citation_df
493
-
494
- col_html1, col_html2 = st.columns(2)
495
-
496
- # HTML Citation - Start of Text
497
- with col_html1:
498
- st.markdown("### Citation (Start of Text)")
499
- st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
500
- st.markdown(citation_link_start, unsafe_allow_html=True)
501
- st.markdown('</div>', unsafe_allow_html=True)
502
-
503
- # HTML Citation - End of Text
504
- with col_html2:
505
- st.markdown("### Citation (End of Text)")
506
- st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
507
- st.markdown(citation_link_end, unsafe_allow_html=True)
508
- st.markdown('</div>', unsafe_allow_html=True)
509
-
510
- # SCC Index
511
- st.markdown("### SCC Index")
512
- st.markdown(metadata_link, unsafe_allow_html=True)
513
-
514
- # Display table after SCC Index
515
- st.markdown("### Citations Table")
516
- st.markdown(get_excel_download_link(df), unsafe_allow_html=True, help="New feature to help you track your citations data. Please make sure to click on the 'Enable Editing' message at the top of the file when you open it in Excel to be able to click and copy the hyperlinked citations correctly.")
517
- st.markdown(df.to_html(classes="citations-table", index=False, escape=False), unsafe_allow_html=True)
518
-
519
- st.markdown('</div>', unsafe_allow_html=True)
520
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  with tabs[1]:
522
  st.markdown('<div class="tab-content">', unsafe_allow_html=True)
523
- st.subheader("Citation Information")
524
- citation_text = st.text_input("Citation Text", help="Paste the citation text, e.g., 'Abuseif et al. (2023)' or '(Abuseif et al., 2023)'", placeholder="e.g., Abuseif et al. (2023)")
525
- citation_url = st.text_input("Citation URL", help="Paste the hyperlink URL from the citation, e.g., 'https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=fragment'", placeholder="e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=fragment")
526
-
527
- st.subheader("SCC Index")
528
- hash_text = st.text_input("SCC Index Text", help="Paste the index text, e.g., 'Abuseif et al. (2023). <hash>'", placeholder="e.g., Abuseif et al. (2023). <hash>")
529
- hash_url = st.text_input("SCC Index URL", help="Paste the hyperlink URL from the index, e.g., 'https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=metadata'", placeholder="e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=metadata")
530
-
531
- verify_button = st.button("Verify Citation", type="primary", use_container_width=True)
532
-
533
- if verify_button:
534
- if not all([citation_text, citation_url, hash_text, hash_url]):
535
- st.error("Please provide all fields (citation text, citation URL, SCC index text, SCC index URL) before verifying.")
536
- else:
537
- # Parse citation text
538
- author, year = parse_citation_text(citation_text)
539
- # Parse citation URL
540
- citation_base_url, citation_fragment = parse_url(citation_url)
541
- # Parse hash text
542
- scc_hash = parse_hash_text(hash_text)
543
- # Parse hash URL
544
- hash_base_url, hash_fragment = parse_url(hash_url)
545
- # Parse metadata from hash URL fragment
546
- username, task_name, date, time = parse_metadata(hash_fragment)
547
 
548
- if not all([author, year, citation_base_url, citation_fragment, scc_hash, hash_base_url, username, task_name, date, time]):
549
- st.error("Invalid input format. Ensure the citation text, URLs, and SCC index text are correctly pasted from the generated output.")
550
- elif citation_base_url != hash_base_url:
551
- st.error("The citation URL and SCC index URL must point to the same base URL.")
 
 
 
 
 
 
 
 
 
 
552
  else:
553
- # Normalize inputs by stripping whitespace
554
- citation_fragment = citation_fragment.strip()
555
- task_name = task_name.strip()
556
- # Check for potential truncation
557
- if len(citation_fragment) < 20:
558
- st.markdown("""
559
- <div class="warning-box">
560
- <strong>Warning:</strong> The citation text fragment may be truncated, which could cause verification to fail.
561
- </div>
562
- """, unsafe_allow_html=True)
563
- selected_citation_fragment = select_longest_segment(citation_fragment)
564
- # Recompute hash
565
- recomputed_hash = generate_citation_hash(
566
- author, year, citation_base_url, selected_citation_fragment, selected_citation_fragment, username, task_name, date, time
567
- )
568
-
569
- if recomputed_hash == scc_hash:
570
- st.markdown("""
571
- <div class="success-box">
572
- <strong>Authentic citation!</strong>
573
- </div>
574
- """, unsafe_allow_html=True)
575
-
576
- # Create DataFrame for citation details
577
- citation_data = {
578
- "Username": [username],
579
- "Task Name": [task_name],
580
- "Time": [time],
581
- "Date": [date],
582
- "URL": [citation_base_url],
583
- "Author(s) Name": [author],
584
- "Year": [year],
585
- "Annotated Text": [citation_fragment]
586
- }
587
- df = pd.DataFrame(citation_data)
588
-
589
- # Display table
590
- st.markdown("### Citations Details")
591
- st.markdown(df.to_html(classes="citations-table", index=False), unsafe_allow_html=True)
592
-
593
- # Provide download link
594
- st.markdown(get_table_download_link(df), unsafe_allow_html=True)
595
  else:
596
- st.markdown("""
597
- <div class="warning-box">
598
- <strong>Unauthentic citation</strong>
599
- </div>
600
- """, unsafe_allow_html=True)
601
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  st.markdown('</div>', unsafe_allow_html=True)
603
 
604
  # Footer
 
99
  padding: 0.8rem;
100
  margin: 0.5rem 0;
101
  border-left: 1px solid #e0e0e0;
102
+ font-family: 'Courier New', monospace;
103
+ font-size: 1rem;
104
  }
105
 
106
  .rendered-citation {
 
108
  font-size: 1.4rem;
109
  }
110
 
111
+ .citation-table {
112
  margin: 1rem 0;
113
  width: 100%;
114
  border-collapse: collapse;
115
  }
116
 
117
+ .citation-table th, .citation-table td {
118
  border: 1px solid #e0e0e0;
119
  padding: 0.5rem;
120
  text-align: left;
121
  }
122
 
123
+ .citation-table th {
124
  background: #f8f8f8;
125
  font-weight: bold;
126
  }
127
+
128
+ .citation-table th:nth-child(7), .citation-table td:nth-child(7) { /* Annotated Text column */
129
+ width: 30%; /* Match SCC Index width */
130
+ }
131
+
132
+ .citation-table th:nth-child(6), .citation-table td:nth-child(6) { /* SCC Index column */
133
+ width: 30%;
134
+ }
135
  </style>
136
  """, unsafe_allow_html=True)
137
 
 
173
  full_url = f"{url}#:~:text={encoded_fragment}"
174
  return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
175
 
176
+ def format_citation_end_html(url, fragment_text, author, year, scc_hash):
177
+ # Select the longest segment for the text fragment to avoid breaking the link
178
+ selected_fragment = select_longest_segment(fragment_text)
179
+ encoded_fragment = encode_text_fragment(selected_fragment)
180
+ full_url = f"{url}#:~:text={encoded_fragment}"
181
+ return f'<a href="{full_url}" data-hash="{scc_hash}">({author}, {year})</a>'
182
+
183
  def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
184
  # Use original task_name with em dashes for text fragment URL
185
  metadata = f"{username}—{task_name}—{current_date}—{current_time}"
 
250
 
251
  for col_idx, col in enumerate(headers):
252
  value = row[col]
253
+ if col in ["Citation (Start of Text)", "Citation (End of Text)", "SCC Index"]:
254
  # Extract URL and display text from HTML anchor tag
255
  match = re.search(r'<a href="([^"]+)"[^>]*>([^<]+)</a>', str(value))
256
  if match:
 
278
  href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download citation data as Excel</a>'
279
  return href
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  # --- Streamlit App ---
282
  st.set_page_config(layout="wide", page_title="Smart Context Citation Tool")
283
 
 
344
  <li><strong>Author(s) Name:</strong> The author(s) of the source (e.g., Abuseif et al.).</li>
345
  <li><strong>Publication Year:</strong> The year of publication (e.g., 2023).</li>
346
  <li><strong>Source URL:</strong> The full URL of the source, without text fragments (e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046).</li>
347
+ <li><strong>Annotated Text:</strong> The sentence or paragraph containing the information you are referencing from the source (e.g., A proposed design framework for green roof settings in general and trees on buildings in particular). Limited to 100 words to ensure reasonable processing.</li>
348
  </ul>
349
  </li>
350
  <li><strong>Generate Citation:</strong> Click the &quot;Generate Citation&quot; button.</li>
 
361
  <ol>
362
  <li>Paste the reference directly in the appropriate place within your document.</li>
363
  <li>Create an SCC Index (instead of a traditional reference list), and paste the corresponding SCC Index entry for each reference you’ve used.</li>
364
+ <li>You can download the citation table to facilitate reference tracking for your study and research. You may also submit it as an appendix with your work.</li>
365
  </ol>
366
  <h4>Verifying Citations (for Markers and Reviewers)</h4>
367
  <ol>
368
+ <li><strong>Access the Tool:</strong> Open the &quot;Verify Citation&quot; tab, which provides two options for verification: Manual Verification or Excel Upload Verification (automated using the citation table).</li>
369
+ <li><strong>Manual Verification:</strong>
 
 
 
 
 
 
370
  <ul>
371
+ <li><strong>Enter Citation Information:</strong>
372
+ <ul>
373
+ <li><strong>Citation Text:</strong> Paste the citation text (e.g., Abuseif et al. (2023) or (Abuseif et al., 2023)).</li>
374
+ <li><strong>Citation URL:</strong> Paste the hyperlink URL from the citation (right-click and select &quot;Copy Link Address&quot;).</li>
375
+ </ul>
376
+ </li>
377
+ <li><strong>Enter SCC Index Information:</strong>
378
+ <ul>
379
+ <li><strong>SCC Index Text:</strong> Paste the index text (e.g., Abuseif et al. (2023). cda7ba19e51e430107e58696758fdf79b8f016d8f27e8f8691ad713e7c8bc668).</li>
380
+ <li><strong>SCC Index URL:</strong> Paste the hyperlink URL from the index (right-click and select &quot;Copy Link Address&quot;).</li>
381
+ </ul>
382
+ </li>
383
+ <li><strong>Verify Citation:</strong> Click the &quot;Verify Citation&quot; button in the Manual Verification tab.</li>
384
+ <li><strong>Review Result:</strong>
385
+ <ul>
386
+ <li><strong>Authentic Citation:</strong> Displayed in green if the hash matches, confirming integrity.</li>
387
+ <li><strong>Unauthentic Citation:</strong> Displayed in red if the hash does not match, indicating potential tampering.</li>
388
+ </ul>
389
+ </li>
390
  </ul>
391
  </li>
392
+ <li><strong>Automated Verification Using Citation Table:</strong>
 
393
  <ul>
394
+ <li><strong>Upload Excel File:</strong> In the Excel Upload Verification tab, upload the Excel file generated from the Citation Generator tab.</li>
395
+ <li><strong>Verify Citations:</strong> Click the &quot;Verify Citations from Excel&quot; button.</li>
396
+ <li><strong>Review Results:</strong> View the verification results in a table, with each citation marked as:
397
+ <ul>
398
+ <li><strong>Authenticated:</strong> If the hash matches, confirming integrity.</li>
399
+ <li><strong>Unauthenticated:</strong> If the hash does not match, indicating potential issues.</li>
400
+ </ul>
401
+ </li>
402
  </ul>
403
  </li>
404
  </ol>
405
  </div>
406
  """, unsafe_allow_html=True)
407
 
408
+ # Tabs for Citation Generator and Verify Citation
409
  tabs = st.tabs(["Citation Generator", "Verify Citation"])
410
 
411
+ # --- Citation Generator Tab ---
412
  with tabs[0]:
413
  st.markdown('<div class="tab-content">', unsafe_allow_html=True)
414
+
415
+ # User Information
416
  st.subheader("User Information")
417
+ col_user1, col_user2 = st.columns(2)
418
+ with col_user1:
419
+ username = st.text_input("Username", help="Enter your username", placeholder="e.g., Majed")
420
+ with col_user2:
421
+ task_name = st.text_input("Task Name", help="Enter the project or assignment name", placeholder="e.g., Design Strategies for Trees on Buildings")
 
 
 
 
 
 
 
 
422
 
423
+ # Citation Information
424
+ st.subheader("Citation Information")
425
+ col_citation1, col_citation2 = st.columns(2)
426
+ with col_citation1:
427
+ author_name = st.text_input("Author(s) Name", help="Enter the author(s) name", placeholder="e.g., Abuseif et al.")
428
+ publication_year = st.text_input("Publication Year", help="Enter the publication year", placeholder="e.g., 2023")
429
+ source_url = st.text_input("Source URL", help="Enter the full URL of the source (without text fragments)", placeholder="e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046")
430
+ with col_citation2:
431
+ annotated_text = st.text_area("Annotated Text", help="Enter the sentence or paragraph containing the referenced information (maximum 100 words)", placeholder="e.g., A proposed design framework for green roof settings...", height=150)
432
+
433
+ # Generate Citation Button
 
 
 
 
 
434
  generate_button = st.button("Generate Citation", type="primary", use_container_width=True)
435
+
436
  if generate_button:
437
+ # Validate inputs
438
  if not all([username, task_name, author_name, publication_year, source_url, annotated_text]):
439
  st.error("Please fill in all fields before generating a citation.")
440
+ elif not re.match(r'https?://[^\s]+', source_url):
441
+ st.error("Please enter a valid URL starting with http:// or https://")
442
+ elif not re.match(r'^\d{4}$', publication_year):
443
+ st.error("Please enter a valid 4-digit publication year.")
444
  elif check_for_fragment(source_url):
445
+ st.error("The source URL should not contain a text fragment (e.g., #:~:text=).")
 
 
 
 
446
  else:
447
+ # Check word count for Annotated Text
448
+ word_count = len(annotated_text.split())
449
+ if word_count > 100:
450
+ st.error("Annotated Text exceeds the maximum limit of 100 words. Please reduce the text.")
451
+ else:
452
+ # Get current date and time in Melbourne timezone
453
+ melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE)
454
+ current_time = datetime.now(melbourne_tz).strftime("%H:%M:%S")
455
+ current_date = datetime.now(melbourne_tz).strftime("%Y-%m-%d")
456
+
457
+ # Generate citation hash
458
+ scc_hash = generate_citation_hash(
459
+ author_name, publication_year, source_url, annotated_text, annotated_text,
460
+ username, task_name, current_date, current_time
461
+ )
462
+
463
+ # Generate HTML citations
464
+ citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
465
+ citation_link_end = format_citation_end_html(source_url, annotated_text, author_name, publication_year, scc_hash)
466
+ metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
467
+
468
+ # --- Persistent Table with Clickable SCC Hash ---
469
+
470
+ # First, ensure session state is initialized for the citation DataFrame
471
+ if 'citation_df' not in st.session_state:
472
+ st.session_state.citation_df = pd.DataFrame(columns=[
473
+ "Username", "Task Name", "Time", "Date", "URL",
474
+ "Citation (Start of Text)", "Citation (End of Text)", "SCC Index", "Annotated Text"
475
+ ])
476
+
477
+ # Create clickable HTML for SCC Index (full metadata link)
478
+ clickable_index = metadata_link
479
+
480
+ # Create new row data
481
+ new_row = {
482
+ "Username": username,
483
+ "Task Name": task_name,
484
+ "Time": current_time,
485
+ "Date": current_date,
486
+ "URL": source_url,
487
+ "Citation (Start of Text)": citation_link_start,
488
+ "Citation (End of Text)": citation_link_end,
489
+ "SCC Index": clickable_index,
490
+ "Annotated Text": annotated_text
491
+ }
492
+
493
+ # Append the new row to the session state DataFrame
494
+ new_df = pd.DataFrame([new_row])
495
+ st.session_state.citation_df = pd.concat([st.session_state.citation_df, new_df], ignore_index=True)
496
+
497
+ # Get the accumulated DataFrame for display and download
498
+ df = st.session_state.citation_df
499
+
500
+ col_html1, col_html2 = st.columns(2)
501
+
502
+ # HTML Citation - Start of Text
503
+ with col_html1:
504
+ st.markdown("### Citation (Start of Text)")
505
+ st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
506
+ st.markdown(citation_link_start, unsafe_allow_html=True)
507
+ st.markdown('</div>', unsafe_allow_html=True)
508
+
509
+ # HTML Citation - End of Text
510
+ with col_html2:
511
+ st.markdown("### Citation (End of Text)")
512
+ st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
513
+ st.markdown(citation_link_end, unsafe_allow_html=True)
514
+ st.markdown('</div>', unsafe_allow_html=True)
515
+
516
+ # SCC Index
517
+ st.markdown("### SCC Index")
518
+ st.markdown(metadata_link, unsafe_allow_html=True)
519
+
520
+ # Display table after SCC Index
521
+ st.markdown("### Citation Table")
522
+ st.markdown(get_excel_download_link(df, "citation_data.xlsx"), unsafe_allow_html=True, help="New feature to help you track your citations data. Please make sure to click on the 'Enable Editing' message at the top of the file when you open it in Excel to be able to click and copy the hyperlinked citations correctly.")
523
+ # Display table with original columns
524
+ display_df = df[["Username", "Task Name", "Time", "Date", "Citation (Start of Text)", "SCC Index", "Annotated Text"]]
525
+ st.markdown(display_df.to_html(classes="citation-table", index=False, escape=False), unsafe_allow_html=True)
526
+
527
+ st.markdown('</div>', unsafe_allow_html=True)
528
+
529
+ # --- Verify Citation Tab ---
530
  with tabs[1]:
531
  st.markdown('<div class="tab-content">', unsafe_allow_html=True)
532
+ verify_tabs = st.tabs(["Manual Verification", "Excel Upload Verification"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
+ with verify_tabs[0]:
535
+ st.subheader("Citation Information")
536
+ citation_text = st.text_input("Citation Text", help="Paste the citation text, e.g., 'Abuseif et al. (2023)' or '(Abuseif et al., 2023)'", placeholder="e.g., Abuseif et al. (2023)", key="manual_citation_text")
537
+ citation_url = st.text_input("Citation URL", help="Paste the hyperlink URL from the citation, e.g., 'https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=fragment'", placeholder="e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=fragment", key="manual_citation_url")
538
+
539
+ st.subheader("SCC Index")
540
+ hash_text = st.text_input("SCC Index Text", help="Paste the index text, e.g., 'Abuseif et al. (2023). <hash>'", placeholder="e.g., Abuseif et al. (2023). <hash>", key="manual_hash_text")
541
+ hash_url = st.text_input("SCC Index URL", help="Paste the hyperlink URL from the index, e.g., 'https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=metadata'", placeholder="e.g., https://www.sciencedirect.com/science/article/pii/S2772411523000046#:~:text=metadata", key="manual_hash_url")
542
+
543
+ verify_button = st.button("Verify Citation", type="primary", use_container_width=True, key="manual_verify_button")
544
+
545
+ if verify_button:
546
+ if not all([citation_text, citation_url, hash_text, hash_url]):
547
+ st.error("Please provide all fields (citation text, citation URL, SCC index text, SCC index URL) before verifying.")
548
  else:
549
+ # Parse citation text
550
+ author, year = parse_citation_text(citation_text)
551
+ # Parse citation URL
552
+ citation_base_url, citation_fragment = parse_url(citation_url)
553
+ # Parse hash text
554
+ scc_hash = parse_hash_text(hash_text)
555
+ # Parse hash URL
556
+ hash_base_url, hash_fragment = parse_url(hash_url)
557
+ # Parse metadata from hash URL fragment
558
+ username, task_name, date, time = parse_metadata(hash_fragment)
559
+
560
+ if not all([author, year, citation_base_url, citation_fragment, scc_hash, hash_base_url, username, task_name, date, time]):
561
+ st.error("Invalid input format. Ensure the citation text, URLs, and SCC index text are correctly pasted from the generated output.")
562
+ elif citation_base_url != hash_base_url:
563
+ st.error("The citation URL and SCC index URL must point to the same base URL.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  else:
565
+ # Normalize inputs by stripping whitespace
566
+ citation_fragment = citation_fragment.strip()
567
+ task_name = task_name.strip()
568
+ # Check for potential truncation
569
+ if len(citation_fragment) < 20:
570
+ st.markdown("""
571
+ <div class="warning-box">
572
+ <strong>Warning:</strong> The citation text fragment may be truncated, which could cause verification to fail.
573
+ </div>
574
+ """, unsafe_allow_html=True)
575
+ selected_citation_fragment = select_longest_segment(citation_fragment)
576
+ # Recompute hash
577
+ recomputed_hash = generate_citation_hash(
578
+ author, year, citation_base_url, selected_citation_fragment, selected_citation_fragment,
579
+ username, task_name, date, time
580
+ )
581
+
582
+ if recomputed_hash == scc_hash:
583
+ st.markdown("""
584
+ <div class="success-box">
585
+ <strong>Authentic citation!</strong>
586
+ </div>
587
+ """, unsafe_allow_html=True)
588
+
589
+ # Create DataFrame for citation details
590
+ citation_data = {
591
+ "Username": [username],
592
+ "Task Name": [task_name],
593
+ "Time": [time],
594
+ "Date": [date],
595
+ "URL": [citation_base_url],
596
+ "Author(s) Name": [author],
597
+ "Year": [year],
598
+ "Annotated Text": [citation_fragment]
599
+ }
600
+ df = pd.DataFrame(citation_data)
601
+
602
+ # Display table
603
+ st.markdown("### Citations Details")
604
+ st.markdown(df.to_html(classes="citations-table", index=False), unsafe_allow_html=True)
605
+
606
+ # Provide download link
607
+ st.markdown(get_table_download_link(df), unsafe_allow_html=True)
608
+ else:
609
+ st.markdown("""
610
+ <div class="warning-box">
611
+ <strong>Unauthentic citation</strong>
612
+ </div>
613
+ """, unsafe_allow_html=True)
614
+
615
+ with verify_tabs[1]:
616
+ st.subheader("Upload Excel File")
617
+ uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"], help="Upload the Excel file containing citation data (generated from the Citation Generator tab).")
618
+
619
+ verify_excel_button = st.button("Verify Citations from Excel", type="primary", use_container_width=True, key="excel_verify_button")
620
+
621
+ if verify_excel_button:
622
+ if not uploaded_file:
623
+ st.error("Please upload an Excel file before verifying.")
624
+ else:
625
+ try:
626
+ # Read Excel file with pandas
627
+ df = pd.read_excel(uploaded_file)
628
+ expected_columns = ["Username", "Task Name", "Time", "Date", "URL", "Citation (Start of Text)", "Citation (End of Text)", "SCC Index", "Annotated Text"]
629
+ if not all(col in df.columns for col in expected_columns):
630
+ st.error("The uploaded Excel file does not contain the required columns: " + ", ".join(expected_columns))
631
+ else:
632
+ results = []
633
+ # Iterate over rows with data
634
+ for row_idx in range(len(df)):
635
+ row = df.iloc[row_idx]
636
+ # Extract text data directly
637
+ username = str(row["Username"]) if pd.notna(row["Username"]) else ""
638
+ task_name = str(row["Task Name"]) if pd.notna(row["Task Name"]) else ""
639
+ time = str(row["Time"]) if pd.notna(row["Time"]) else ""
640
+ date = str(row["Date"]) if pd.notna(row["Date"]) else ""
641
+ base_url = str(row["URL"]) if pd.notna(row["URL"]) else ""
642
+ annotated_text = str(row["Annotated Text"]) if pd.notna(row["Annotated Text"]) else ""
643
+ citation_start_text = str(row["Citation (Start of Text)"]) if pd.notna(row["Citation (Start of Text)"]) else ""
644
+ citation_end_text = str(row["Citation (End of Text)"]) if pd.notna(row["Citation (End of Text)"]) else ""
645
+ hash_text = str(row["SCC Index"]) if pd.notna(row["SCC Index"]) else ""
646
+
647
+ # Initialize variables for verification
648
+ status = "Unauthenticated"
649
+ author = year = scc_hash = None
650
+
651
+ # Perform verification using either Citation (Start of Text) or Citation (End of Text)
652
+ citation_text = citation_start_text or citation_end_text
653
+
654
+ if all([citation_text, hash_text, base_url, annotated_text, username, task_name, date, time]):
655
+ # Parse citation text for author and year
656
+ author, year = parse_citation_text(citation_text)
657
+ # Parse hash from SCC Index text
658
+ scc_hash = parse_hash_text(hash_text)
659
+
660
+ if all([author, year, scc_hash]):
661
+ # Use Annotated Text as citation fragment
662
+ citation_fragment = annotated_text.strip()
663
+ # Check for potential truncation
664
+ if len(citation_fragment) < 20:
665
+ st.markdown("""
666
+ <div class="warning-box">
667
+ <strong>Warning:</strong> The citation text fragment in row {} may be truncated, which could cause verification to fail.
668
+ </div>
669
+ """.format(row_idx + 2), unsafe_allow_html=True)
670
+ selected_citation_fragment = select_longest_segment(citation_fragment)
671
+ # Recompute hash using text data
672
+ recomputed_hash = generate_citation_hash(
673
+ author, year, base_url, selected_citation_fragment, selected_citation_fragment,
674
+ username, task_name, date, time
675
+ )
676
+ if recomputed_hash == scc_hash:
677
+ status = "Authenticated"
678
+
679
+ # Store result for this row
680
+ results.append({
681
+ "Username": username,
682
+ "Task Name": task_name,
683
+ "Time": time,
684
+ "Date": date,
685
+ "URL": base_url if base_url else "N/A",
686
+ "Author(s) Name": author if author else "N/A",
687
+ "Year": year if year else "N/A",
688
+ "Annotated Text": annotated_text,
689
+ "Status": status
690
+ })
691
+
692
+ # Create results DataFrame
693
+ results_df = pd.DataFrame(results)
694
+
695
+ # Display results
696
+ st.markdown("### Citation Verification Results")
697
+ st.markdown(results_df.to_html(classes="citations-table", index=False), unsafe_allow_html=True)
698
+ st.markdown(get_table_download_link(results_df, "verified_citation_data.csv"), unsafe_allow_html=True)
699
+
700
+ # Display success message
701
+ if results:
702
+ st.markdown("""
703
+ <div class="success-box">
704
+ <strong>Citations processed successfully!</strong>
705
+ </div>
706
+ """, unsafe_allow_html=True)
707
+ else:
708
+ st.error("No valid data found in the Excel file.")
709
+
710
+ except Exception as e:
711
+ st.error(f"Error processing Excel file: {str(e)}")
712
+
713
  st.markdown('</div>', unsafe_allow_html=True)
714
 
715
  # Footer