Spaces:

JAYASREESS
/

semantic_main

Configuration error

File size: 7,314 Bytes

253246d

import streamlit as st
import pandas as pd
import os
import tempfile
from backend import SemanticAnalyzer

st.set_page_config(page_title="Semantic Document Analyzer", layout="wide")

st.markdown("""
    <style>
    /* Premium Look & Feel */
    .stApp {
        background: linear-gradient(to right, #f8f9fa, #e9ecef);
        font-family: 'Inter', sans-serif;
    }
    .stButton>button {
        background: linear-gradient(45deg, #4f46e5, #7c3aed);
        color: white;
        border: none;
        border-radius: 8px;
        padding: 0.75rem 1.5rem;
        font-weight: 600;
        transition: all 0.3s ease;
    }
    .stButton>button:hover {
        transform: translateY(-2px);
        box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3);
    }
    div[data-testid="stMetricValue"] {
        color: #111827;
        font-weight: 700;
    }
    h1 {
        background: -webkit-linear-gradient(45deg, #1e3a8a, #3b82f6);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        font-weight: 800 !important;
    }
    .css-1d391kg {
        background-color: #ffffff;
        border-radius: 12px;
        padding: 1.5rem;
        box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
    }
    </style>
""", unsafe_allow_html=True)

st.title("🧠 Semantic Document Analyzer")
st.markdown("""
<div style='background-color: white; padding: 1.5rem; border-radius: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.05); margin-bottom: 2rem;'>
    <h4 style='margin-top:0'>Holistic Document Understanding</h4>
    <p style='color: #4b5563;'>
    This AI system leverages <b>Sentence-BERT</b> and <b>Cross-Encoders</b> to perform deep semantic analysis across long documents.
    It goes beyond simple keyword matching to understand context, detecting subtle contradictions and semantic duplicates.
    </p>
</div>
""", unsafe_allow_html=True)

# Sidebar
with st.sidebar:
    st.header("Upload Documents")
    uploaded_files = st.file_uploader("Upload PDF files", type=['pdf'], accept_multiple_files=True)
    analyze_btn = st.button("Analyze Documents", type="primary")

if analyze_btn and uploaded_files:
    if len(uploaded_files) == 0:
        st.error("Please upload at least one document.")
    else:
        with st.spinner("Processing documents... This may take a while for large files."):
            # Save uploaded files temporarily
            temp_dir = tempfile.mkdtemp()
            file_paths = []
            for uploaded_file in uploaded_files:
                path = os.path.join(temp_dir, uploaded_file.name)
                with open(path, "wb") as f:
                    f.write(uploaded_file.getbuffer())
                file_paths.append(path)
            
            # Initialize Analyzer
            try:
                analyzer = SemanticAnalyzer()
                results = analyzer.analyze_documents(file_paths)
                
                # Cleanup
                # for path in file_paths: os.remove(path)
                # os.rmdir(temp_dir)
                
                if "error" in results:
                    st.error(results["error"])
                else:
                    # Dashboard Layout
                    col1, col2 = st.columns(2)
                    with col1:
                        st.metric("Total Documents", results['stats']['total_docs'])
                    with col2:
                        st.metric("Total Text Chunks", results['stats']['total_chunks'])
                        
                    st.divider()
                    
                    # 1. Duplicates
                    st.subheader(f"⚠️ Potential Duplicates Detected ({len(results['duplicates'])})")
                    if results['duplicates']:
                        for dup in results['duplicates']:
                            with st.expander(f"Similarity Score: {dup['score']:.4f}"):
                                c1, c2 = st.columns(2)
                                with c1:
                                    st.caption(f"Source: {dup['chunk_a']['source']}")
                                    st.info(dup['chunk_a']['text'])
                                with c2:
                                    st.caption(f"Source: {dup['chunk_b']['source']}")
                                    st.info(dup['chunk_b']['text'])
                    else:
                        st.success("No duplicates found.")
                        
                    st.divider()
                    
                    # 2. Contradictions
                    st.subheader(f"🛑 Contradictions / Inconsistencies ({len(results['contradictions'])})")
                    if results['contradictions']:
                        for contra in results['contradictions']:
                            with st.expander(f"Contradiction Confidence: {contra['confidence']:.4f}"):
                                c1, c2 = st.columns(2)
                                with c1:
                                    st.caption(f"Source: {contra['chunk_a']['source']}")
                                    st.warning(contra['chunk_a']['text'])
                                with c2:
                                    st.caption(f"Source: {contra['chunk_b']['source']}")
                                    st.warning(contra['chunk_b']['text'])
                            # Export Report
                    report_text = f"# Semantic Analysis Report\n\n"
                    report_text += f"Total Documents: {results['stats']['total_docs']}\n"
                    report_text += f"Total Chunks: {results['stats']['total_chunks']}\n\n"
                    
                    report_text += "## Duplicates\n"
                    if results['duplicates']:
                        for d in results['duplicates']:
                            report_text += f"- Score: {d['score']:.4f}\n"
                            report_text += f"  - Source A: {d['chunk_a']['source']} | \"{d['chunk_a']['text'][:100]}...\"\n"
                            report_text += f"  - Source B: {d['chunk_b']['source']} | \"{d['chunk_b']['text'][:100]}...\"\n\n"
                    else:
                        report_text += "No duplicates found.\n\n"
                        
                    report_text += "## Contradictions\n"
                    if results['contradictions']:
                         for c in results['contradictions']:
                            report_text += f"- Confidence: {c['confidence']:.4f}\n"
                            report_text += f"  - Source A: {c['chunk_a']['source']} | \"{c['chunk_a']['text']}\"\n"
                            report_text += f"  - Source B: {c['chunk_b']['source']} | \"{c['chunk_b']['text']}\"\n\n"
                    else:
                        report_text += "No contradictions found.\n"

                    st.download_button(
                        label="Download Report (Markdown)",
                        data=report_text,
                        file_name="analysis_report.md",
                        mime="text/markdown"
                    )


            except Exception as e:
                st.error(f"An error occurred during analysis: {str(e)}")
                import traceback
                st.write(traceback.format_exc())

else:
    st.info("Upload documents and click Analyze to start.")