SCC / app.py
mabuseif's picture
Update app.py
dfd2cfc verified
raw
history blame
16.9 kB
import streamlit as st
import streamlit.components.v1 as components
import hashlib
import urllib.parse
from datetime import datetime
import pytz
import pandas as pd
import re
# --- Constants ---
MELBOURNE_TIMEZONE = 'Australia/Melbourne'
# --- Custom CSS for simplified UI ---
def load_css():
st.markdown("""
<style>
.main-header {
padding: 2rem;
text-align: center;
margin-bottom: 2rem;
}
.citation-output {
background: #f8f8f8;
border: 1px solid #e0e0e0;
border-radius: 4px;
padding: 1rem;
margin: 1rem 0;
font-family: 'Courier New', monospace;
}
.warning-box {
background: #f8f8f8;
border: 1px solid #e0e0e0;
border-radius: 4px;
padding: 1rem;
margin: 1rem 0;
}
.success-box {
background: #f8f8f8;
border: 1px solid #e0e0e0;
border-radius: 4px;
padding: 1rem;
margin: 1rem 0;
}
.info-card {
background: white;
border-radius: 4px;
padding: 1.5rem;
margin: 1rem 0;
border-left: 1px solid #e0e0e0;
}
.footer {
text-align: center;
padding: 2rem;
margin-top: 2rem;
border-top: 1px solid #e0e0e0;
font-size: 0.9rem;
}
.hash-display {
background: #f8f8f8;
border: 1px solid #e0e0e0;
border-radius: 4px;
padding: 1rem;
font-family: 'Courier New', monospace;
font-size: 0.85rem;
word-break: break-all;
margin: 0.5rem 0;
}
.tab-content {
padding: 2rem 0;
}
.datetime-display {
background: #f8f8f8;
border-radius: 4px;
padding: 0.8rem;
margin: 0.5rem 0;
border-left: 1px solid #e0e0e0;
}
.verification-table {
margin: 1rem 0;
border-radius: 4px;
overflow: hidden;
}
.rendered-citation {
margin: 1rem 0;
font-size: 1rem;
}
</style>
""", unsafe_allow_html=True)
# --- Helper Functions ---
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
data = f"{author}, {year} | {url} | {fragment_text} | {cited_text} | {username} | {task_name} | {current_date} | {current_time}"
return hashlib.sha256(data.encode('utf-8')).hexdigest()
def format_citation_html(url, fragment_text, author, year, scc_hash):
encoded_fragment = urllib.parse.quote(fragment_text)
full_url = f"{url}#:~:text={encoded_fragment}"
return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year})</a>'
def format_metadata_html(url, author, year, scc_hash, username, task_name, current_date, current_time):
metadata = f"{username}{task_name}{current_date}{current_time}"
encoded_metadata = urllib.parse.quote(metadata)
full_url = f"{url}#:~:text={encoded_metadata}"
return f'<a href="{full_url}" data-hash="{scc_hash}">{author} ({year}). {scc_hash}</a>'
def check_for_fragment(url):
return '#:~:text=' in url
def parse_citation(citation_html):
# Parse citation HTML to extract author, year, URL, and fragment text
match = re.match(r'<a href="([^"]+)#:~:text=([^"]+)" data-hash="([^"]+)">([^<]+) \((\d{4})\)</a>', citation_html)
if match:
url, encoded_fragment, scc_hash, author, year = match.groups()
fragment_text = urllib.parse.unquote(encoded_fragment)
return author, year, url, fragment_text, scc_hash
return None, None, None, None, None
def parse_metadata_hash(metadata_html):
# Parse metadata HTML to extract hash, username, task_name, date, time
match = re.match(r'<a href="([^"]+)#:~:text=([^"]+)" data-hash="([^"]+)">[^<]+\(\d{4}\)\. ([^<]+)</a>', metadata_html)
if match:
url, encoded_metadata, scc_hash, same_hash = match.groups()
metadata_parts = urllib.parse.unquote(encoded_metadata).split('—')
if len(metadata_parts) == 4:
username, task_name, date, time = metadata_parts
return scc_hash, username, task_name, date, time
return None, None, None, None, None
# --- JavaScript for extracting link attributes ---
def verification_js():
return """
<script>
function updateOutputs() {
const citationInput = document.getElementById('citation_input');
const hashInput = document.getElementById('hash_input');
const citationOutput = document.getElementById('citation_output');
const hashOutput = document.getElementById('hash_output');
if (citationInput && hashInput && citationOutput && hashOutput) {
const citationLink = citationInput.getElementsByTagName('a')[0];
const hashLink = hashInput.getElementsByTagName('a')[0];
citationOutput.value = citationLink ? citationLink.outerHTML : '';
hashOutput.value = hashLink ? hashLink.outerHTML : '';
}
}
document.addEventListener('DOMContentLoaded', function() {
const citationInput = document.getElementById('citation_input');
const hashInput = document.getElementById('hash_input');
if (citationInput) {
citationInput.addEventListener('paste', function() {
setTimeout(updateOutputs, 100); // Delay to ensure paste is complete
});
citationInput.addEventListener('input', updateOutputs);
}
if (hashInput) {
hashInput.addEventListener('paste', function() {
setTimeout(updateOutputs, 100); // Delay to ensure paste is complete
});
hashInput.addEventListener('input', updateOutputs);
}
});
</script>
"""
# --- Live Clock JavaScript ---
def live_clock():
return """
<div class="datetime-display">
<span id="live_datetime"></span>
</div>
<script>
function updateClock() {
const options = {
timeZone: 'Australia/Melbourne',
year: 'numeric',
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
hour12: false
};
const formatter = new Intl.DateTimeFormat('en-AU', options);
const now = new Date();
const parts = formatter.formatToParts(now);
const date = `${parts[4].value}-${parts[2].value}-${parts[0].value}`;
const time = `${parts[6].value}:${parts[8].value}:${parts[10].value}`;
const datetimeElement = document.getElementById('live_datetime');
if (datetimeElement) {
datetimeElement.innerText = `${date} ${time}`;
}
}
updateClock();
setInterval(updateClock, 1000);
</script>
"""
# --- Streamlit App ---
st.set_page_config(layout="wide", page_title="Smart Context Citation Tool")
# Load custom CSS
load_css()
# Main header
st.markdown("""
<div class="main-header">
<h1>Smart Context Citation (SCC) Tool</h1>
<p>Next-generation digital referencing system for the age of Generative AI</p>
</div>
""", unsafe_allow_html=True)
# Expandable section for About and Example
with st.expander("About SCC and Example Citation"):
st.markdown("""
<div class="info-card">
<h3>About SCC</h3>
The Smart Context Citation (SCC) style is a next-generation digital referencing system designed for the age of Generative AI. It embeds citation context directly in the document, uses cryptographic hash signatures for integrity, and eliminates traditional reference lists.
<strong>Purpose:</strong> Transparency, integrity, and digital fluency in citations.
<strong>Structure:</strong>
- Inline general author name and date style citation
- Hyperlinked URL with text fragment (#:~:text=)
- SHA-256 hash for verification
<strong>Benefits:</strong> Enhances fairness, integrates with source contexts, promotes digital fluency, prevents fabrication, and eliminates traditional reference lists.
<strong>Technical Legitimacy:</strong> Referencing the <a href="https://wicg.github.io/scroll-to-text-fragment/" target="_blank">Text Fragments WICG specification</a> for technical legitimacy.
</div>
<div class="info-card">
<h3>Example Citation</h3>
<strong>Input:</strong><br>
- Author: <code>Abuseif et al.</code><br>
- Year: <code>2025</code><br>
- URL: <code>https://www.sciencedirect.com/science/article/pii/S2772411523000046</code><br>
- Text: <code>A proposed design framework for green roof settings in general and trees on buildings</code><br>
<strong>Output (Start of Text):</strong><br>
<div class="rendered-citation">
Abuseif et al. (2025)
</div>
<strong>Output (End of Text):</strong><br>
<div class="rendered-citation">
(Abuseif et al., 2025)
</div>
</div>
""", unsafe_allow_html=True)
tabs = st.tabs(["Citation Generator", "Verify Citation"])
with tabs[0]:
st.markdown('<div class="tab-content">', unsafe_allow_html=True)
st.header("Generate New Citation")
# User Information Section
st.subheader("User Information")
col1, col2 = st.columns(2)
with col1:
username = st.text_input("Username", help="Your username for tracking purposes", placeholder="e.g., john_doe")
with col2:
task_name = st.text_input("Task Name", help="The name of the task or project", placeholder="e.g., Literature Review Assignment")
# Citation Info Section
st.subheader("Citation Info")
col3, col4 = st.columns(2)
with col3:
author_name = st.text_input("Author(s) Name", help="The author(s) of the source", placeholder="e.g., Smith or Smith et al.")
with col4:
publication_year = st.text_input("Publication Year", help="The year of publication", placeholder="e.g., 2023")
col5, col6 = st.columns(2)
with col5:
source_url = st.text_input("Source URL", help="The full URL of the source", placeholder="https://example.com/article")
with col6:
annotated_text = st.text_input("Annotated Text", help="The text quoted or paraphrased from the source", placeholder="e.g., Thermal comfort thresholds...")
# Live date and time display
st.markdown("### Current Date and Time")
components.html(live_clock(), height=50)
# Get current date and time in Melbourne timezone for hash generation
melbourne_tz = pytz.timezone(MELBOURNE_TIMEZONE)
current_datetime_melbourne = datetime.now(melbourne_tz)
current_date = current_datetime_melbourne.strftime("%Y-%m-%d")
current_time = current_datetime_melbourne.strftime("%H:%M:%S")
generate_button = st.button("Generate Citation", type="primary", use_container_width=True)
if generate_button:
if not all([username, task_name, author_name, publication_year, source_url, annotated_text]):
st.error("Please fill in all fields before generating a citation.")
elif check_for_fragment(source_url):
st.markdown("""
<div class="warning-box">
<strong>Warning:</strong> It seems like your URL already contains a text fragment (<code>#:~:text=</code>).
This suggests you may have used AI assistance in generating this link. Please go back to the original source,
read the context carefully, and copy the source link again without any existing fragment.
</div>
""", unsafe_allow_html=True)
else:
scc_hash = generate_citation_hash(author_name, publication_year, source_url, annotated_text, annotated_text, username, task_name, current_date, current_time)
citation_link_start = format_citation_html(source_url, annotated_text, author_name, publication_year, scc_hash)
citation_link_end = f'<a href="{source_url}#:~:text={urllib.parse.quote(annotated_text)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
st.markdown("## Generated Citations")
col_html1, col_html2 = st.columns(2)
# HTML Citation - Start of Text
with col_html1:
st.markdown("### Citation (Start of Text)")
st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
st.markdown(citation_link_start, unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
# HTML Citation - End of Text
with col_html2:
st.markdown("### Citation (End of Text)")
st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
st.markdown(citation_link_end, unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
# SCC Ledger
st.markdown("### SCC Ledger")
st.markdown('<div class="hash-display">', unsafe_allow_html=True)
st.markdown(metadata_link, unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
with tabs[1]:
st.markdown('<div class="tab-content">', unsafe_allow_html=True)
st.header("Verify Citation")
st.markdown("""
<div class="info-card">
Paste the generated citation and hash with their embedded links below to verify the citation's authenticity. Copy the rendered links directly from the output.
</div>
""", unsafe_allow_html=True)
# Initialize session state for storing extracted data
if 'citation_html' not in st.session_state:
st.session_state.citation_html = ''
if 'hash_html' not in st.session_state:
st.session_state.hash_html = ''
# Input fields for citation and hash
st.markdown("""
<div>
<label>Paste Citation (with embedded link)</label>
<div id="citation_input" contenteditable="true" style="border: 1px solid #e0e0e0; border-radius: 4px; padding: 0.5rem; min-height: 50px; margin-bottom: 1rem;"></div>
<input type="hidden" id="citation_output" name="citation_output">
<label>Paste Hash (with embedded link)</label>
<div id="hash_input" contenteditable="true" style="border: 1px solid #e0e0e0; border-radius: 4px; padding: 0.5rem; min-height: 50px; margin-bottom: 1rem;"></div>
<input type="hidden" id="hash_output" name="hash_output">
</div>
""", unsafe_allow_html=True)
# Add JavaScript to capture pasted link data
components.html(verification_js(), height=0)
# Use session state to capture JavaScript output
citation_html = st.session_state.get('citation_output', '')
hash_html = st.session_state.get('hash_output', '')
verify_button = st.button("Verify Citation", type="primary", use_container_width=True)
if verify_button:
if not (citation_html and hash_html):
st.error("Please paste both the citation and hash links before verifying.")
else:
# Parse citation
author, year, url, fragment_text, citation_hash = parse_citation(citation_html)
# Parse hash and metadata
scc_hash, username, task_name, date, time = parse_metadata_hash(hash_html)
if not all([author, year, url, fragment_text, scc_hash, username, task_name, date, time]):
st.error("Invalid citation or hash format. Please ensure both inputs are correctly pasted links from the generated output.")
else:
# Recompute hash
recomputed_hash = generate_citation_hash(
author, year, url, fragment_text, fragment_text, username, task_name, date, time
)
if recomputed_hash == scc_hash:
st.markdown("""
<div class="success-box">
<strong>Hash verified successfully!</strong> The citation is authentic and hasn't been tampered with.
</div>
""", unsafe_allow_html=True)
else:
st.markdown("""
<div class="warning-box">
<strong>Hash verification failed!</strong> The citation may have been altered or is not authentic.
</div>
""", unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
# Footer
st.markdown("""
<div class="footer">
Developed by: Dr Majed Abuseif<br>
School of Architecture and Built Environment<br>
Deakin University<br>
© 2025
</div>
""", unsafe_allow_html=True)