import streamlit as st import streamlit.components.v1 as components import hashlib import urllib.parse from datetime import datetime import pytz import pandas as pd import re # --- Constants --- MELBOURNE_TIMEZONE = 'Australia/Melbourne' # --- Custom CSS for simplified UI --- def load_css(): st.markdown(""" """, unsafe_allow_html=True) # --- Helper Functions --- def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time): data = f"{author}, {year} | {url} | {fragment_text} | {cited_text} | {username} | {task_name} | {current_date} | {current_time}" return hashlib.sha256(data.encode('utf-8')).hexdigest() def format_citation_html(url, fragment_text, author, year, scc_hash): encoded_fragment = urllib.parse.quote(fragment_text) full_url = f"{url}#:~:text={encoded_fragment}" return f'{author} ({year})' def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time): metadata = f"{username}—{task_name}—{current_date}—{current_time}" encoded_metadata = urllib.parse.quote(metadata) full_url = f"{url}#:~:text={encoded_metadata}" return f'{author} ({year}). {scc_hash}' def check_for_fragment(url): return '#:~:text=' in url def parse_citation(citation_html): # Parse citation HTML to extract author, year, URL, and fragment text match = re.match(r'([^<]+) \((\d{4})\)', citation_html) if match: url, encoded_fragment, scc_hash, author, year = match.groups() fragment_text = urllib.parse.unquote(encoded_fragment) return author, year, url, fragment_text, scc_hash return None, None, None, None, None def parse_metadata_hash(metadata_html): # Parse metadata HTML to extract hash, username, task_name, date, time match = re.match(r'[^<]+\(\d{4}\)\. ([^<]+)', metadata_html) if match: url, encoded_metadata, scc_hash, same_hash = match.groups() metadata_parts = urllib.parse.unquote(encoded_metadata).split('—') if len(metadata_parts) == 4: username, task_name, date, time = metadata_parts return scc_hash, username, task_name, date, time return None, None, None, None, None # --- JavaScript for extracting link attributes --- def verification_js(): return """ """ # --- Live Clock JavaScript --- def live_clock(): return """
""" # --- Streamlit App --- st.set_page_config(layout="wide", page_title="Smart Context Citation Tool") # Load custom CSS load_css() # Main header st.markdown("""

Smart Context Citation (SCC) Tool

Next-generation digital referencing system for the age of Generative AI

""", unsafe_allow_html=True) # Expandable section for About and Example with st.expander("About SCC and Example Citation"): st.markdown("""

About SCC

The Smart Context Citation (SCC) style is a next-generation digital referencing system designed for the age of Generative AI. It embeds citation context directly in the document, uses cryptographic hash signatures for integrity, and eliminates traditional reference lists. Purpose: Transparency, integrity, and digital fluency in citations. Structure: - Inline general author name and date style citation - Hyperlinked URL with text fragment (#:~:text=) - SHA-256 hash for verification Benefits: Enhances fairness, integrates with source contexts, promotes digital fluency, prevents fabrication, and eliminates traditional reference lists. Technical Legitimacy: Referencing the Text Fragments WICG specification for technical legitimacy.

Example Citation

Input:
- Author: Abuseif et al.
- Year: 2025
- URL: https://www.sciencedirect.com/science/article/pii/S2772411523000046
- Text: A proposed design framework for green roof settings in general and trees on buildings
Output (Start of Text):
Abuseif et al. (2025)
Output (End of Text):
(Abuseif et al., 2025)
""", unsafe_allow_html=True) tabs = st.tabs(["Citation Generator", "Verify Citation"]) with tabs[0]: st.markdown('
', unsafe_allow_html=True) st.header("Generate New Citation") # User Information Section st.subheader("User Information") col1, col2 = st.columns(2) with col1: username = st.text_input("Username", help="Your username for tracking purposes", placeholder="e.g., john_doe") with col2: task_name = st.text_input("Task Name", help="The name of the task or project", placeholder="e.g., Literature Review Assignment") # Citation Info Section st.subheader("Citation Info") col3, col4 = st.columns(2) with col3: author_name = st.text_input("Author(s) Name", help="The author(s) of the source", placeholder="e.g., Smith or Smith et al.") with col4: publication_year = st.text_input("Publication Year", help="The year of publication", placeholder="e.g., 2023") col5, col6 = st.columns(2) with col5: source_url = st.text_input("Source URL", help="The full URL of the source", placeholder="https://example.com/article") with col6: annotated_text = st.text_input("Annotated Text", help="The text quoted or paraphrased from the source", placeholder="e.g., Thermal comfort thresholds...") # Live date and time display st.markdown("### Current Date and Time") components.html(live_clock(), height=50) # Get current date and time in Melbourne timezone for hash generation melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE) current_datetime_melbourne = datetime.now(melbourne_tz) current_date = current_datetime_melbourne.strftime("%Y-%m-%d") current_time = current_datetime_melbourne.strftime("%H:%M:%S") generate_button = st.button("Generate Citation", type="primary", use_container_width=True) if generate_button: if not all([username, task_name, author_name, publication_year, source_url, annotated_text]): st.error("Please fill in all fields before generating a citation.") elif check_for_fragment(source_url): st.markdown("""
Warning: It seems like your URL already contains a text fragment (#:~:text=). This suggests you may have used AI assistance in generating this link. Please go back to the original source, read the context carefully, and copy the source link again without any existing fragment.
""", unsafe_allow_html=True) else: scc_hash = generate_citation_hash(author_name, publication_year, source_url, annotated_text, annotated_text, username, task_name, current_date, current_time) citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash) citation_link_end = f'({author_name}, {publication_year})' metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time) st.markdown("## Generated Citations") col_html1, col_html2 = st.columns(2) # HTML Citation - Start of Text with col_html1: st.markdown("### Citation (Start of Text)") st.markdown('
', unsafe_allow_html=True) st.markdown(citation_link_start, unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # HTML Citation - End of Text with col_html2: st.markdown("### Citation (End of Text)") st.markdown('
', unsafe_allow_html=True) st.markdown(citation_link_end, unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # SCC Ledger st.markdown("### SCC Ledger") st.markdown('
', unsafe_allow_html=True) st.markdown(metadata_link, unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) with tabs[1]: st.markdown('
', unsafe_allow_html=True) st.header("Verify Citation") st.markdown("""
Paste the generated citation and hash with their embedded links below to verify the citation's authenticity. Copy the rendered links directly from the output.
""", unsafe_allow_html=True) # Initialize session state for storing extracted data if 'citation_html' not in st.session_state: st.session_state.citation_html = '' if 'hash_html' not in st.session_state: st.session_state.hash_html = '' # Input fields for citation and hash st.markdown("""
""", unsafe_allow_html=True) # Add JavaScript to capture pasted link data components.html(verification_js(), height=0) # Use session state to capture JavaScript output citation_html = st.session_state.get('citation_output', '') hash_html = st.session_state.get('hash_output', '') verify_button = st.button("Verify Citation", type="primary", use_container_width=True) if verify_button: if not (citation_html and hash_html): st.error("Please paste both the citation and hash links before verifying.") else: # Parse citation author, year, url, fragment_text, citation_hash = parse_citation(citation_html) # Parse hash and metadata scc_hash, username, task_name, date, time = parse_metadata_hash(hash_html) if not all([author, year, url, fragment_text, scc_hash, username, task_name, date, time]): st.error("Invalid citation or hash format. Please ensure both inputs are correctly pasted links from the generated output.") else: # Recompute hash recomputed_hash = generate_citation_hash( author, year, url, fragment_text, fragment_text, username, task_name, date, time ) if recomputed_hash == scc_hash: st.markdown("""
Hash verified successfully! The citation is authentic and hasn't been tampered with.
""", unsafe_allow_html=True) else: st.markdown("""
Hash verification failed! The citation may have been altered or is not authentic.
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # Footer st.markdown(""" """, unsafe_allow_html=True)