Spaces:
Running
Running
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import hashlib | |
| import urllib.parse | |
| from datetime import datetime | |
| import pytz | |
| import pandas as pd | |
| import re | |
| # --- Constants --- | |
| MELBOURNE_TIMEZONE = 'Australia/Melbourne' | |
| # --- Custom CSS for simplified UI --- | |
| def load_css(): | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| padding: 2rem; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .citation-output { | |
| background: #f8f8f8; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 4px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| font-family: 'Courier New', monospace; | |
| } | |
| .warning-box { | |
| background: #f8f8f8; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 4px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .success-box { | |
| background: #f8f8f8; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 4px; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| } | |
| .info-card { | |
| background: white; | |
| border-radius: 4px; | |
| padding: 1.5rem; | |
| margin: 1rem 0; | |
| border-left: 1px solid #e0e0e0; | |
| } | |
| .footer { | |
| text-align: center; | |
| padding: 2rem; | |
| margin-top: 2rem; | |
| border-top: 1px solid #e0e0e0; | |
| font-size: 0.9rem; | |
| } | |
| .hash-display { | |
| background: #f8f8f8; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 4px; | |
| padding: 1rem; | |
| font-family: 'Courier New', monospace; | |
| font-size: 0.85rem; | |
| word-break: break-all; | |
| margin: 0.5rem 0; | |
| } | |
| .tab-content { | |
| padding: 2rem 0; | |
| } | |
| .datetime-display { | |
| background: #f8f8f8; | |
| border-radius: 4px; | |
| padding: 0.8rem; | |
| margin: 0.5rem 0; | |
| border-left: 1px solid #e0e0e0; | |
| } | |
| .verification-table { | |
| margin: 1rem 0; | |
| border-radius: 4px; | |
| overflow: hidden; | |
| } | |
| .rendered-citation { | |
| margin: 1rem 0; | |
| font-size: 1rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # --- Helper Functions --- | |
| def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time): | |
| data = f"{author}, {year} | {url} | {fragment_text} | {cited_text} | {username} | {task_name} | {current_date} | {current_time}" | |
| return hashlib.sha256(data.encode('utf-8')).hexdigest() | |
| def format_citation_html(url, fragment_text, author, year, scc_hash): | |
| encoded_fragment = urllib.parse.quote(fragment_text) | |
| full_url = f"{url}#:~:text={encoded_fragment}" | |
| return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>' | |
| def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time): | |
| metadata = f"{username}—{task_name}—{current_date}—{current_time}" | |
| encoded_metadata = urllib.parse.quote(metadata) | |
| full_url = f"{url}#:~:text={encoded_metadata}" | |
| return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year}). {scc_hash}</a>' | |
| def check_for_fragment(url): | |
| return '#:~:text=' in url | |
| def parse_citation(citation_html): | |
| # Parse citation HTML to extract author, year, URL, and fragment text | |
| match = re.match(r'<a href="([^"]+)#:~:text=([^"]+)" data-hash="([^"]+)">([^<]+) \((\d{4})\)</a>', citation_html) | |
| if match: | |
| url, encoded_fragment, scc_hash, author, year = match.groups() | |
| fragment_text = urllib.parse.unquote(encoded_fragment) | |
| return author, year, url, fragment_text, scc_hash | |
| return None, None, None, None, None | |
| def parse_metadata_hash(metadata_html): | |
| # Parse metadata HTML to extract hash, username, task_name, date, time | |
| match = re.match(r'<a href="([^"]+)#:~:text=([^"]+)" data-hash="([^"]+)">[^<]+\(\d{4}\)\. ([^<]+)</a>', metadata_html) | |
| if match: | |
| url, encoded_metadata, scc_hash, same_hash = match.groups() | |
| metadata_parts = urllib.parse.unquote(encoded_metadata).split('—') | |
| if len(metadata_parts) == 4: | |
| username, task_name, date, time = metadata_parts | |
| return scc_hash, username, task_name, date, time | |
| return None, None, None, None, None | |
| # --- JavaScript for extracting link attributes --- | |
| def verification_js(): | |
| return """ | |
| <script> | |
| function updateOutputs() { | |
| const citationInput = document.getElementById('citation_input'); | |
| const hashInput = document.getElementById('hash_input'); | |
| const citationOutput = document.getElementById('citation_output'); | |
| const hashOutput = document.getElementById('hash_output'); | |
| if (citationInput && hashInput && citationOutput && hashOutput) { | |
| const citationLink = citationInput.getElementsByTagName('a')[0]; | |
| const hashLink = hashInput.getElementsByTagName('a')[0]; | |
| citationOutput.value = citationLink ? citationLink.outerHTML : ''; | |
| hashOutput.value = hashLink ? hashLink.outerHTML : ''; | |
| } | |
| } | |
| document.addEventListener('DOMContentLoaded', function() { | |
| const citationInput = document.getElementById('citation_input'); | |
| const hashInput = document.getElementById('hash_input'); | |
| if (citationInput) { | |
| citationInput.addEventListener('paste', function() { | |
| setTimeout(updateOutputs, 100); // Delay to ensure paste is complete | |
| }); | |
| citationInput.addEventListener('input', updateOutputs); | |
| } | |
| if (hashInput) { | |
| hashInput.addEventListener('paste', function() { | |
| setTimeout(updateOutputs, 100); // Delay to ensure paste is complete | |
| }); | |
| hashInput.addEventListener('input', updateOutputs); | |
| } | |
| }); | |
| </script> | |
| """ | |
| # --- Live Clock JavaScript --- | |
| def live_clock(): | |
| return """ | |
| <div class="datetime-display"> | |
| <span id="live_datetime"></span> | |
| </div> | |
| <script> | |
| function updateClock() { | |
| const options = { | |
| timeZone: 'Australia/Melbourne', | |
| year: 'numeric', | |
| month: '2-digit', | |
| day: '2-digit', | |
| hour: '2-digit', | |
| minute: '2-digit', | |
| second: '2-digit', | |
| hour12: false | |
| }; | |
| const formatter = new Intl.DateTimeFormat('en-AU', options); | |
| const now = new Date(); | |
| const parts = formatter.formatToParts(now); | |
| const date = `${parts[4].value}-${parts[2].value}-${parts[0].value}`; | |
| const time = `${parts[6].value}:${parts[8].value}:${parts[10].value}`; | |
| const datetimeElement = document.getElementById('live_datetime'); | |
| if (datetimeElement) { | |
| datetimeElement.innerText = `${date} ${time}`; | |
| } | |
| } | |
| updateClock(); | |
| setInterval(updateClock, 1000); | |
| </script> | |
| """ | |
| # --- Streamlit App --- | |
| st.set_page_config(layout="wide", page_title="Smart Context Citation Tool") | |
| # Load custom CSS | |
| load_css() | |
| # Main header | |
| st.markdown(""" | |
| <div class="main-header"> | |
| <h1>Smart Context Citation (SCC) Tool</h1> | |
| <p>Next-generation digital referencing system for the age of Generative AI</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Expandable section for About and Example | |
| with st.expander("About SCC and Example Citation"): | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h3>About SCC</h3> | |
| The Smart Context Citation (SCC) style is a next-generation digital referencing system designed for the age of Generative AI. It embeds citation context directly in the document, uses cryptographic hash signatures for integrity, and eliminates traditional reference lists. | |
| <strong>Purpose:</strong> Transparency, integrity, and digital fluency in citations. | |
| <strong>Structure:</strong> | |
| - Inline general author name and date style citation | |
| - Hyperlinked URL with text fragment (#:~:text=) | |
| - SHA-256 hash for verification | |
| <strong>Benefits:</strong> Enhances fairness, integrates with source contexts, promotes digital fluency, prevents fabrication, and eliminates traditional reference lists. | |
| <strong>Technical Legitimacy:</strong> Referencing the <a href="https://wicg.github.io/scroll-to-text-fragment/" target="_blank">Text Fragments WICG specification</a> for technical legitimacy. | |
| </div> | |
| <div class="info-card"> | |
| <h3>Example Citation</h3> | |
| <strong>Input:</strong><br> | |
| - Author: <code>Abuseif et al.</code><br> | |
| - Year: <code>2025</code><br> | |
| - URL: <code>https://www.sciencedirect.com/science/article/pii/S2772411523000046</code><br> | |
| - Text: <code>A proposed design framework for green roof settings in general and trees on buildings</code><br> | |
| <strong>Output (Start of Text):</strong><br> | |
| <div class="rendered-citation"> | |
| Abuseif et al. (2025) | |
| </div> | |
| <strong>Output (End of Text):</strong><br> | |
| <div class="rendered-citation"> | |
| (Abuseif et al., 2025) | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| tabs = st.tabs(["Citation Generator", "Verify Citation"]) | |
| with tabs[0]: | |
| st.markdown('<div class="tab-content">', unsafe_allow_html=True) | |
| st.header("Generate New Citation") | |
| # User Information Section | |
| st.subheader("User Information") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| username = st.text_input("Username", help="Your username for tracking purposes", placeholder="e.g., john_doe") | |
| with col2: | |
| task_name = st.text_input("Task Name", help="The name of the task or project", placeholder="e.g., Literature Review Assignment") | |
| # Citation Info Section | |
| st.subheader("Citation Info") | |
| col3, col4 = st.columns(2) | |
| with col3: | |
| author_name = st.text_input("Author(s) Name", help="The author(s) of the source", placeholder="e.g., Smith or Smith et al.") | |
| with col4: | |
| publication_year = st.text_input("Publication Year", help="The year of publication", placeholder="e.g., 2023") | |
| col5, col6 = st.columns(2) | |
| with col5: | |
| source_url = st.text_input("Source URL", help="The full URL of the source", placeholder="https://example.com/article") | |
| with col6: | |
| annotated_text = st.text_input("Annotated Text", help="The text quoted or paraphrased from the source", placeholder="e.g., Thermal comfort thresholds...") | |
| # Live date and time display | |
| st.markdown("### Current Date and Time") | |
| components.html(live_clock(), height=50) | |
| # Get current date and time in Melbourne timezone for hash generation | |
| melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE) | |
| current_datetime_melbourne = datetime.now(melbourne_tz) | |
| current_date = current_datetime_melbourne.strftime("%Y-%m-%d") | |
| current_time = current_datetime_melbourne.strftime("%H:%M:%S") | |
| generate_button = st.button("Generate Citation", type="primary", use_container_width=True) | |
| if generate_button: | |
| if not all([username, task_name, author_name, publication_year, source_url, annotated_text]): | |
| st.error("Please fill in all fields before generating a citation.") | |
| elif check_for_fragment(source_url): | |
| st.markdown(""" | |
| <div class="warning-box"> | |
| <strong>Warning:</strong> It seems like your URL already contains a text fragment (<code>#:~:text=</code>). | |
| This suggests you may have used AI assistance in generating this link. Please go back to the original source, | |
| read the context carefully, and copy the source link again without any existing fragment. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| scc_hash = generate_citation_hash(author_name, publication_year, source_url, annotated_text, annotated_text, username, task_name, current_date, current_time) | |
| citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash) | |
| citation_link_end = f'<a href="{source_url}#:~:text={urllib.parse.quote(annotated_text)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>' | |
| metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time) | |
| st.markdown("## Generated Citations") | |
| col_html1, col_html2 = st.columns(2) | |
| # HTML Citation - Start of Text | |
| with col_html1: | |
| st.markdown("### Citation (Start of Text)") | |
| st.markdown('<div class="rendered-citation">', unsafe_allow_html=True) | |
| st.markdown(citation_link_start, unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # HTML Citation - End of Text | |
| with col_html2: | |
| st.markdown("### Citation (End of Text)") | |
| st.markdown('<div class="rendered-citation">', unsafe_allow_html=True) | |
| st.markdown(citation_link_end, unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # SCC Ledger | |
| st.markdown("### SCC Ledger") | |
| st.markdown('<div class="hash-display">', unsafe_allow_html=True) | |
| st.markdown(metadata_link, unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| with tabs[1]: | |
| st.markdown('<div class="tab-content">', unsafe_allow_html=True) | |
| st.header("Verify Citation") | |
| st.markdown(""" | |
| <div class="info-card"> | |
| Paste the generated citation and hash with their embedded links below to verify the citation's authenticity. Copy the rendered links directly from the output. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state for storing extracted data | |
| if 'citation_html' not in st.session_state: | |
| st.session_state.citation_html = '' | |
| if 'hash_html' not in st.session_state: | |
| st.session_state.hash_html = '' | |
| # Input fields for citation and hash | |
| st.markdown(""" | |
| <div> | |
| <label>Paste Citation (with embedded link)</label> | |
| <div id="citation_input" contenteditable="true" style="border: 1px solid #e0e0e0; border-radius: 4px; padding: 0.5rem; min-height: 50px; margin-bottom: 1rem;"></div> | |
| <input type="hidden" id="citation_output" name="citation_output"> | |
| <label>Paste Hash (with embedded link)</label> | |
| <div id="hash_input" contenteditable="true" style="border: 1px solid #e0e0e0; border-radius: 4px; padding: 0.5rem; min-height: 50px; margin-bottom: 1rem;"></div> | |
| <input type="hidden" id="hash_output" name="hash_output"> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Add JavaScript to capture pasted link data | |
| components.html(verification_js(), height=0) | |
| # Use session state to capture JavaScript output | |
| citation_html = st.session_state.get('citation_output', '') | |
| hash_html = st.session_state.get('hash_output', '') | |
| verify_button = st.button("Verify Citation", type="primary", use_container_width=True) | |
| if verify_button: | |
| if not (citation_html and hash_html): | |
| st.error("Please paste both the citation and hash links before verifying.") | |
| else: | |
| # Parse citation | |
| author, year, url, fragment_text, citation_hash = parse_citation(citation_html) | |
| # Parse hash and metadata | |
| scc_hash, username, task_name, date, time = parse_metadata_hash(hash_html) | |
| if not all([author, year, url, fragment_text, scc_hash, username, task_name, date, time]): | |
| st.error("Invalid citation or hash format. Please ensure both inputs are correctly pasted links from the generated output.") | |
| else: | |
| # Recompute hash | |
| recomputed_hash = generate_citation_hash( | |
| author, year, url, fragment_text, fragment_text, username, task_name, date, time | |
| ) | |
| if recomputed_hash == scc_hash: | |
| st.markdown(""" | |
| <div class="success-box"> | |
| <strong>Hash verified successfully!</strong> The citation is authentic and hasn't been tampered with. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.markdown(""" | |
| <div class="warning-box"> | |
| <strong>Hash verification failed!</strong> The citation may have been altered or is not authentic. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Footer | |
| st.markdown(""" | |
| <div class="footer"> | |
| Developed by: Dr Majed Abuseif<br> | |
| School of Architecture and Built Environment<br> | |
| Deakin University<br> | |
| © 2025 | |
| </div> | |
| """, unsafe_allow_html=True) |