semantic_main / app.py
JAYASREESS's picture
Upload 8 files
253246d verified
import streamlit as st
import pandas as pd
import os
import tempfile
from backend import SemanticAnalyzer
st.set_page_config(page_title="Semantic Document Analyzer", layout="wide")
st.markdown("""
<style>
/* Premium Look & Feel */
.stApp {
background: linear-gradient(to right, #f8f9fa, #e9ecef);
font-family: 'Inter', sans-serif;
}
.stButton>button {
background: linear-gradient(45deg, #4f46e5, #7c3aed);
color: white;
border: none;
border-radius: 8px;
padding: 0.75rem 1.5rem;
font-weight: 600;
transition: all 0.3s ease;
}
.stButton>button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3);
}
div[data-testid="stMetricValue"] {
color: #111827;
font-weight: 700;
}
h1 {
background: -webkit-linear-gradient(45deg, #1e3a8a, #3b82f6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 800 !important;
}
.css-1d391kg {
background-color: #ffffff;
border-radius: 12px;
padding: 1.5rem;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
}
</style>
""", unsafe_allow_html=True)
st.title("🧠 Semantic Document Analyzer")
st.markdown("""
<div style='background-color: white; padding: 1.5rem; border-radius: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.05); margin-bottom: 2rem;'>
<h4 style='margin-top:0'>Holistic Document Understanding</h4>
<p style='color: #4b5563;'>
This AI system leverages <b>Sentence-BERT</b> and <b>Cross-Encoders</b> to perform deep semantic analysis across long documents.
It goes beyond simple keyword matching to understand context, detecting subtle contradictions and semantic duplicates.
</p>
</div>
""", unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.header("Upload Documents")
uploaded_files = st.file_uploader("Upload PDF files", type=['pdf'], accept_multiple_files=True)
analyze_btn = st.button("Analyze Documents", type="primary")
if analyze_btn and uploaded_files:
if len(uploaded_files) == 0:
st.error("Please upload at least one document.")
else:
with st.spinner("Processing documents... This may take a while for large files."):
# Save uploaded files temporarily
temp_dir = tempfile.mkdtemp()
file_paths = []
for uploaded_file in uploaded_files:
path = os.path.join(temp_dir, uploaded_file.name)
with open(path, "wb") as f:
f.write(uploaded_file.getbuffer())
file_paths.append(path)
# Initialize Analyzer
try:
analyzer = SemanticAnalyzer()
results = analyzer.analyze_documents(file_paths)
# Cleanup
# for path in file_paths: os.remove(path)
# os.rmdir(temp_dir)
if "error" in results:
st.error(results["error"])
else:
# Dashboard Layout
col1, col2 = st.columns(2)
with col1:
st.metric("Total Documents", results['stats']['total_docs'])
with col2:
st.metric("Total Text Chunks", results['stats']['total_chunks'])
st.divider()
# 1. Duplicates
st.subheader(f"⚠️ Potential Duplicates Detected ({len(results['duplicates'])})")
if results['duplicates']:
for dup in results['duplicates']:
with st.expander(f"Similarity Score: {dup['score']:.4f}"):
c1, c2 = st.columns(2)
with c1:
st.caption(f"Source: {dup['chunk_a']['source']}")
st.info(dup['chunk_a']['text'])
with c2:
st.caption(f"Source: {dup['chunk_b']['source']}")
st.info(dup['chunk_b']['text'])
else:
st.success("No duplicates found.")
st.divider()
# 2. Contradictions
st.subheader(f"🛑 Contradictions / Inconsistencies ({len(results['contradictions'])})")
if results['contradictions']:
for contra in results['contradictions']:
with st.expander(f"Contradiction Confidence: {contra['confidence']:.4f}"):
c1, c2 = st.columns(2)
with c1:
st.caption(f"Source: {contra['chunk_a']['source']}")
st.warning(contra['chunk_a']['text'])
with c2:
st.caption(f"Source: {contra['chunk_b']['source']}")
st.warning(contra['chunk_b']['text'])
# Export Report
report_text = f"# Semantic Analysis Report\n\n"
report_text += f"Total Documents: {results['stats']['total_docs']}\n"
report_text += f"Total Chunks: {results['stats']['total_chunks']}\n\n"
report_text += "## Duplicates\n"
if results['duplicates']:
for d in results['duplicates']:
report_text += f"- Score: {d['score']:.4f}\n"
report_text += f" - Source A: {d['chunk_a']['source']} | \"{d['chunk_a']['text'][:100]}...\"\n"
report_text += f" - Source B: {d['chunk_b']['source']} | \"{d['chunk_b']['text'][:100]}...\"\n\n"
else:
report_text += "No duplicates found.\n\n"
report_text += "## Contradictions\n"
if results['contradictions']:
for c in results['contradictions']:
report_text += f"- Confidence: {c['confidence']:.4f}\n"
report_text += f" - Source A: {c['chunk_a']['source']} | \"{c['chunk_a']['text']}\"\n"
report_text += f" - Source B: {c['chunk_b']['source']} | \"{c['chunk_b']['text']}\"\n\n"
else:
report_text += "No contradictions found.\n"
st.download_button(
label="Download Report (Markdown)",
data=report_text,
file_name="analysis_report.md",
mime="text/markdown"
)
except Exception as e:
st.error(f"An error occurred during analysis: {str(e)}")
import traceback
st.write(traceback.format_exc())
else:
st.info("Upload documents and click Analyze to start.")