import subprocess
import sys as sys_mod

try:
    import spacy
    spacy.load("en_core_web_sm")
except OSError:
    print("Downloading spacy model...")
    subprocess.run([sys_mod.executable, "-m", "spacy", "download", "en_core_web_sm"], check=True)

import streamlit as st
import sys
import json
from pathlib import Path

THIS_FILE = Path(__file__).resolve()
PROJECT_ROOT = THIS_FILE.parent
SRC_DIR = THIS_FILE.parent / "src"

# Docker: backend is at /app/backend (same level as src)
# Local: backend is at PROJECT_ROOT/backend
if (PROJECT_ROOT / "backend").exists():
    BACKEND_DIR = PROJECT_ROOT / "backend"
elif (THIS_FILE.parent.parent / "backend").exists():
    BACKEND_DIR = THIS_FILE.parent.parent / "backend"
else:
    BACKEND_DIR = THIS_FILE.parent

PIPELINE_DIR = BACKEND_DIR / "pipeline"

new_path = [str(PROJECT_ROOT), str(BACKEND_DIR)]
if PIPELINE_DIR.exists():
    new_path.append(str(PIPELINE_DIR))

for p in new_path:
    if p in sys.path:
        sys.path.remove(p)
    sys.path.insert(0, p)

import importlib
for mod_name in ['backend', 'backend.pipeline',
               'backend.pipeline.dictionaries',
               'backend.pipeline.parser',
               'backend.pipeline.scorer']:
    if mod_name in sys.modules:
        del sys.modules[mod_name]

st.set_page_config(
    page_title="Syntactic Morality Analyzer",
    page_icon="X",
    layout="wide"
)

def import_pipeline_module(module_filename, module_name):
    import importlib.util
    spec = importlib.util.spec_from_file_location(
        module_name, str(PIPELINE_DIR / module_filename)
    )
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    return mod

def init_components():
    dicts_mod  = import_pipeline_module("dictionaries.py",    "pipeline_dictionaries")
    parser_mod = import_pipeline_module("syntactic_parser.py", "pipeline_parser")
    scorer_mod = import_pipeline_module("scorer.py",           "pipeline_scorer")

    DictionaryLoader = dicts_mod.DictionaryLoader
    SyntacticParser  = parser_mod.SyntacticParser
    MoralScorer      = scorer_mod.MoralScorer

    dict_loader = DictionaryLoader(str(BACKEND_DIR / "data"))
    dict_loader.load_all()

    parser = SyntacticParser()
    scorer = MoralScorer(dict_loader, parser)

    return dict_loader, parser, scorer

def load_results():
    results_file = BACKEND_DIR / "models" / "multi_dict_results.json"
    if results_file.exists():
        with open(results_file) as f:
            return json.load(f)
    return None

def main():
    dict_loader, parser, scorer = init_components()
    
    st.title("Syntactic Morality Analyzer")
    st.markdown("**Extension to eMACDscore** (Malik et al., 2025)")
    st.markdown("Adds syntactic weighting to detect negation and grammatical roles.")
    
    st.sidebar.header("Settings")
    # All 5 dictionaries - code auto-creates placeholders if files missing
    dict_options = {
        "mfd": "MFD", 
        "mfd2": "MFD 2.0",
        "emfd": "eMFD", 
        "emacd": "eMACD",
        "macd": "MACD"
    }
    selected_dict = st.sidebar.selectbox(
        "Dictionary", 
        list(dict_options.keys()),
        format_func=lambda x: dict_options[x]
    )
    
    results = load_results()
    if results:
        st.sidebar.markdown("### Training Results (Macro F1)")
        for d, r in results.items():
            b = round(r.get("baseline", {}).get("macro", 0), 3)
            s = round(r.get("syntax", {}).get("macro", 0), 3)
            diff = round(s - b, 3)
            st.sidebar.markdown(f"**{d}**: {b} -> {s} ({diff:+})")
    
    st.header("Input Text")
    text_input = st.text_area(
        "Enter text to analyze:", 
        height=80, 
        placeholder="e.g., I'm not caring about fairness"
    )
    
    col1, col2 = st.columns(2)
    with col1:
        analyze_synx = st.button("Analyze with Syntax", type="primary", use_container_width=True)
    with col2:
        analyze_baseline = st.button("Analyze Baseline", use_container_width=True)
    
    if text_input and (analyze_synx or analyze_baseline):
        st.divider()
        st.header("Results")
        
        baseline_scores = scorer.score_baseline(text_input, selected_dict)
        syntax_scores = scorer.score(text_input, selected_dict)
        
        domains = dict_loader.get_domains(selected_dict)
        text_lower = text_input.lower()
        
        if analyze_baseline:
            st.subheader("Baseline (Keyword Only)")
            for domain in domains:
                score = baseline_scores.get(domain, 0)
                if score > 0:
                    domain_words = dict_loader.get_words(selected_dict, domain)
                    if isinstance(domain_words, dict):
                        domain_words = list(domain_words.keys())
                    matched = [w for w in domain_words if w.lower() in text_lower]
                    if matched:
                        st.markdown(f"**{domain}**: {', '.join(matched)}")
                        st.progress(float(score), text=f"Score: {score:.3f}")
        
        if analyze_synx:
            st.subheader("Syntax-Enhanced Results")
            for domain, score in syntax_scores.items():
                delta = score - baseline_scores.get(domain, 0)
                st.progress(float(score), text=f"{domain}: {score:.3f} ({delta:+.3f})")
        
        st.subheader("Syntactic Breakdown")
        syntactic = parser.parse(text_input)
        col1, col2, col3 = st.columns(3)
        with col1:
            st.write("**Tokens:**", syntactic["tokens"])
        with col2:
            st.write("**Subjects:**", [s["text"] for s in syntactic.get("subjects", [])])
        with col3:
            st.write("**Objects:**", [o["text"] for o in syntactic.get("objects", [])])
        
        if syntactic.get("negation_scopes"):
            st.warning("Negation detected! Keywords in negation scope have reduced scores.")

if __name__ == "__main__":
    main()