Spaces:

AniseF
/

pln-ddgp-plus

Runtime error

File size: 11,939 Bytes

8b678a0
 
 
 
 
 
 
 
ed22bec
b027b7e
ed22bec
07c03b6
dca43a5
3416470
dca43a5
 
 
 
 
 
55930c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dca43a5
 
8b678a0
b261a6d
8b678a0
 
 
 
 
 
 
4760717
8b678a0
 
 
 
 
 
 
b027b7e
f41f5ce
8b678a0
b261a6d
 
 
 
 
2d409dd
b261a6d
 
f41f5ce
 
 
 
 
 
 
 
 
 
 
 
8949453
 
 
 
 
10d0c1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f41f5ce
cb07453
f41f5ce
b027b7e
 
 
 
c772e57
f41f5ce
896cd13
cb07453
896cd13
 
 
 
 
cb07453
896cd13
b261a6d
 
 
 
 
8949453
 
 
b261a6d
 
 
 
 
f41f5ce
 
 
 
 
b261a6d
f41f5ce
75f6904
 
 
 
 
 
b261a6d
75f6904
 
 
 
 
b261a6d
f41f5ce
 
 
e537a9b
f41f5ce
 
 
b027b7e
f41f5ce
e537a9b
 
 
 
f41f5ce
b261a6d
e537a9b
 
 
 
b027b7e
 
f41f5ce
 
b027b7e
 
f41f5ce
 
b261a6d
 
 
e537a9b
8949453
b261a6d
 
 
 
 
f41f5ce
8949453
 
37a2e48
e537a9b
 
8949453
e537a9b
8949453
e537a9b
b027b7e
37a2e48
 
 
 
8949453
37a2e48
b261a6d
e537a9b
 
 
 
 
 
 
 
cb07453
8949453
 
b261a6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e22ec4
b261a6d
dca43a5
2446ebe
b261a6d
bea2adc
de7baaa
8949453
bea2adc
de7baaa
 
dca43a5
bea2adc
 
7af212a
bea2adc
8949453
d80f633
bea2adc
 
 
 
 
 
 
 
 
 
 
8949453
bea2adc
 
 
 
 
8949453
 
bea2adc
 
 
8949453
 
9167704
 
 
235545f
 
8949453
 
bea2adc
8949453
 
bea2adc
8949453
 
0e22ec4
fa710f7
 
3dccda7
 
fa710f7
3dccda7
 
fa710f7
3dccda7
 
 
 
 
 
 
 
2446ebe
 
 
 
 
 
 
 
fa76c99
55930c6
2446ebe

import os
import subprocess
import sys
import urllib.request
import json
import pandas as pd
import spacy
import gradio as gr
import re
import unicodedata
from translit import latin_to_basic_grc

# Código para o cabeçalho (CSS)
# 1. Defina as variáveis (certifique-se que o nome aqui...)
alpheios_css = """
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/alpheios-components@latest/dist/style/style-components.min.css"/>
"""

alpheios_js = """
function() {
    const loadAlpheios = () => {
        import("https://cdn.jsdelivr.net/npm/alpheios-embedded@latest/dist/alpheios-embedded.min.js")
        .then(embedLib => {
            const alpheios = embedLib.AlpheiosEmbedded.ImportLib();
            alpheios.activate({
                clientId: 'ddgp-plus-space',
                authStatus: 'notLoggedIn'
            });
            console.log("Alpheios Ativado com Sucesso!");
        }).catch(e => console.error("Erro Alpheios:", e));
    };

    if (document.readyState === 'complete') {
        loadAlpheios();
    } else {
        window.addEventListener('load', loadAlpheios);
    }
}
"""

# --- 1. INSTALAÇÃO E CARREGAMENTO ---
def install_odycy():
    try:
        import grc_odycy_joint_sm
    except ImportError:
        url = "https://huggingface.co/chcaa/grc_odycy_joint_sm/resolve/main/grc_odycy_joint_sm-any-py3-none-any.whl"
        valid_wheel_name = "grc_odycy_joint_sm-1.0.0-py3-none-any.whl"
        urllib.request.urlretrieve(url, valid_wheel_name)
        subprocess.check_call([sys.executable, "-m", "pip", "install", valid_wheel_name, "--no-deps"])
        os.remove(valid_wheel_name)

install_odycy()

try:
    import grc_odycy_joint_sm
    nlp = grc_odycy_joint_sm.load()
except:
    nlp = spacy.blank("grc")

TAG_MAP = {
    'NOUN': 'Substantivo', 'VERB': 'Verbo', 'ADJ': 'Adjetivo',
    'DET': 'Artigo/Det.', 'PRON': 'Pronome', 'ADV': 'Advérbio',
    'ADP': 'Preposição', 'CCONJ': 'Conjunção', 'SCONJ': 'Conjunção Sub.',
    'PART': 'Partícula', 'PROPN': 'Nome Próprio', 'PUNCT': 'Pontuação',
    'AUX': 'Auxiliar/Cópula', 'NUM': 'Numeral'
}

def load_json(path):
    if os.path.exists(path):
        with open(path, 'r', encoding='utf-8') as f:
            return json.load(f)
    return {}

INDEX_LEMAS = load_json('ddgp_index_lemas.json')
INDEX_FORMAS = load_json('ddgp_index_formas_final.json')
FORMA_TO_LEMA = load_json('ddgp_forma_to_lema.json')
ENTRIES = load_json('ddgp3x_entry.json')
ABREV = load_json('abrev.json')

css_content = ""
for f_css in ['style.css', 'style_map.css']:
    if os.path.exists(f_css):
        with open(f_css, 'r', encoding='utf-8') as f:
            css_content += f.read() + "\n"
            
 # Adicione esta parte agora para incluir as fontes maiores:
css_content += """
/* Aumenta a fonte da caixa de entrada (textarea) */
textarea {
    font-size: 20px !important;
    line-height: 1.6 !important;
    font-family: 'Gentium Plus', 'Times New Roman', serif !important;
}

/* Aumenta a fonte das tabelas de análise */
table, th, td {
    font-size: 18px !important;
}

/* Ajusta especificamente o texto dentro das células da tabela */
.gr-table td {
    font-size: 18px !important;
    padding: 10px !important;
}

/* Se houver componentes de Markdown/HTML com grego */
.prose {
    font-size: 18px !important;
}
"""       

# --- 2. FUNÇÕES DE APOIO E ORDENAÇÃO ---

def normalizar_grego(texto):
    if not texto: return ""
    texto = unicodedata.normalize('NFD', texto.lower())
    texto = "".join(c for c in texto if not unicodedata.combining(c))
    return unicodedata.normalize('NFC', texto).strip()

def ordem_grega(lema):
    alfabeto_map = {
        'α': 1, 'β': 2, 'γ': 3, 'δ': 4, 'ε': 5, 'ζ': 6, 'η': 7, 'θ': 8,
        'ι': 9, 'κ': 10, 'λ': 11, 'μ': 12, 'ν': 13, 'ξ': 14, 'ο': 15, 'π': 16,
        'ρ': 17, 'σ': 18, 'ς': 18, 'τ': 19, 'υ': 20, 'φ': 21, 'χ': 22, 'ψ': 23, 'ω': 24
    }
    lema_limpo = normalizar_grego(lema)
    return [alfabeto_map.get(char, 99) for char in lema_limpo]

def aplicar_abreviaturas_seguro(texto):
    if not texto: return ""
    sorted_abrevs = sorted(ABREV.keys(), key=len, reverse=True)
    for ab in sorted_abrevs:
        pattern = r'\b' + re.escape(ab) + r'(?=\s|[.,;:]|$)'
        info = ABREV[ab]
        desc = info.get('descricao', '')
        categoria = info.get('categoria', '')
        classe_css = "autor-sc" if categoria == 'autor' else "abrev"
        subst = f'<span class="{classe_css}" title="{desc}">{ab}</span>'
        texto = re.sub(pattern, subst, texto)
    return texto

def format_entry_html(entry_id):
    entry = ENTRIES.get(str(entry_id))
    if not entry: return None
    gword = entry.get('gword', '')
    pdesc = entry.get('pdesc', '')
    pdesc = aplicar_abreviaturas_seguro(pdesc)
    pdesc = re.sub(r'〈(.*?)〉', r'<span class="etimo">〈\1〉</span>', pdesc)
    
    return f"""
    <div class="result-box" style="text-transform: none !important; font-variant: normal !important;">
        <div style="color: #1a4d8f; font-size: 1.3em; font-weight: bold; margin-bottom: 6px; text-transform: none !important; font-variant: normal !important;">
            {gword}
        </div>
        <div style="line-height: 1.6; text-transform: none !important; font-variant: normal !important;">
            {pdesc}
        </div>
    </div>
    """

# --- 3. CONSULTA E ANÁLISE ---

def consultar_ddgp(termo):
    if not termo: return ""
    # Translitera se for latim
    if any(ord(c) < 128 for c in termo if c.isalpha()):
        termo = latin_to_basic_grc(termo)
    
    termo_norm = normalizar_grego(termo)
    ids = []
    
    # BUSCA ESTRITA: Tentamos apenas o termo exato ou variações numéricas (ex: logos, logos1)
    # Isso evita que a busca traga palavras que apenas "começam" com o termo.
    tentativas = [termo_norm] + [f"{termo_norm}{i}" for i in range(1, 4)]
    
    for b in tentativas:
        if b in INDEX_LEMAS:
            ids.append(INDEX_LEMAS[b])
    
    if not ids: return ""
    
    html = ""
    for eid in sorted(set(ids)):
        res = format_entry_html(eid)
        if res: html += res
    return html

def analisar_texto(texto):
    if not texto: return None, None, "0", "0", "0", "0", "0", ""
    doc = nlp(texto)
    dados = []
    lemas_unicos_processados = set()
    verbetes_dict = {} 

    for token in doc:
        l_orig = token.lemma_
        l_norm = normalizar_grego(l_orig)
        pos_pt = TAG_MAP.get(token.pos_, token.pos_)
        
        morph_info = str(token.morph).replace("Case=", "").replace("Gender=", "").replace("Number=", "").replace("VerbForm=", "").replace("Person=", "").replace("Tense=", "").replace("Mood=", "").replace("Voice=", "")
        if not morph_info: morph_info = "-"
        
        # Busca no dicionário apenas uma vez por lema
        if token.pos_ not in ['PUNCT', 'SYM', 'SPACE'] and l_norm not in lemas_unicos_processados:
            res_html = consultar_ddgp(l_norm)
            if res_html:
                verbetes_dict[l_orig] = res_html 
            lemas_unicos_processados.add(l_norm)
        
        dados.append({
            'Palavra': token.text, 
            'Lema': l_orig, 
            'Classe': pos_pt,
            'Morfologia': morph_info
        })

    # Cabeçalho com o aviso solicitado
    aviso_html = """
    <div style="background-color: #f8f9fa; padding: 10px; border-left: 4px solid #1a4d8f; margin-bottom: 15px; font-size: 0.9em; color: #555;">
        💡 <b>Dica:</b> Caso um lema não apareça automaticamente abaixo, utilize a aba "Busca direta no DDGP" para consultá-lo manualmente.
    </div>
    """
    
    lexico_html = aviso_html
    for lema_ord in sorted(verbetes_dict.keys(), key=ordem_grega):
        lexico_html += verbetes_dict[lema_ord]

    df = pd.DataFrame(dados)
    tokens_df = df[~df['Classe'].isin(['Pontuação', 'PUNCT', 'SYM', 'SPACE'])]
    n_tokens = len(tokens_df)
    n_types = tokens_df['Palavra'].str.lower().nunique()
    n_lemas = tokens_df['Lema'].nunique()
    
    ttr = (n_types / n_tokens) if n_tokens > 0 else 0
    ltr = (n_lemas / n_tokens) if n_tokens > 0 else 0
    
    csv_path = "analise_filologica.csv"
    df.to_csv(csv_path, index=False)
    
    return (df.head(100), csv_path, 
            str(n_tokens), str(n_types), str(n_lemas), 
            f"{ltr:.2f}", f"{ttr:.2f}", lexico_html)

# --- 4. INTERFACE ---
# Modifique esta linha para incluir head e js:
with gr.Blocks(title="DDGP + OdyCy") as demo:
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            gr.HTML("""
                <div style="display: flex; align-items: center; justify-content: flex-start; height: 80px;">
                    <img src="https://raw.githubusercontent.com/aniseferreira/DDGP_Plus/main/ddgp/logo.png" style="height: 80px;">
                </div>
            """)
    # ... restante do seu código ...
        with gr.Column(scale=4):
            gr.Markdown("# Estação Filológica DDGP & OdyCy")
            gr.Markdown("## DDGP Plus: Análise lexical e consulta ao Dicionário Digital Grego-Português.")

    with gr.Tab("📝 Análise lexical"):
        txt = gr.Textbox(label="Texto em Grego Antigo", lines=6, placeholder="Insira o texto aqui sem aspas...Δειναὶ γὰρ αἱ γυναῖκες εὑρίσκειν τέχνας.")
        btn = gr.Button("🚀 Executar Análise", variant="primary")
        
        with gr.Row():
            t1 = gr.Label(label="Tokens (Total)")
            t2 = gr.Label(label="Types (Formas Únicas)")
            t3 = gr.Label(label="Lemas (Entradas)")
            t4 = gr.Label(label="LTR (Lema-Token)")
            t5 = gr.Label(label="TTR (Type-Token)")
            
        with gr.Row():
            with gr.Column(scale=2):
                out_t = gr.Dataframe(label="Formas")
                out_f = gr.File(label="Exportar CSV")
            with gr.Column(scale=1):
                gr.Markdown("### 📖 Léxico Contextual")
                out_l = gr.HTML()
            
    with gr.Tab("🔍 Busca direta no DDGP"):
        in_b = gr.Textbox(label="Busca direta no DDGP", placeholder="(ex. λόγος, logos)")
        btn_b = gr.Button("Consultar Base")
        out_b = gr.HTML()

    gr.Markdown(f"""
    ---
    **DDGP Plus** — Analisador Morfológico e Dicionário Digital de Grego–Português 2026 v.1  
    Baseado originalmente no Dicionário Grego-Português e diretamente no Dicionário Digital Grego–Português (DDGP e DGP - ver créditos em [hipatia.fclar.unesp.br](http://hipatia.fclar.unesp.br))  
    Projetos Abertos em Letras Clássicas Digitais. Responsável: **Anise D'Orange Ferreira**. 
    *Desenvolvimento técnico e programação assistida por Gemini (Google AI).*  
    Sob licença CC BY-NC-SA 4.0.  
    """)

    btn.click(analisar_texto, inputs=txt, 
              outputs=[out_t, out_f, t1, t2, t3, t4, t5, out_l],
              api_name="analisar")
    
    btn_b.click(consultar_ddgp, inputs=in_b, outputs=out_b,
                api_name="consultar")

# No final do seu app.py, substitua demo.launch() por:

   # No final do bloco 'with gr.Blocks...'
    def carregar_e_analisar(request: gr.Request):
        params = request.query_params
        texto_da_url = params.get("text", "")
        return texto_da_url

    # Quando o Space carrega: 
    # 1. Preenche o texto
    # 2. Se houver texto, dispara a função de análise imediatamente
    demo.load(carregar_e_analisar, None, txt).then(
        fn=analisar_texto,
        inputs=txt,
        outputs=[out_t, out_f, t1, t2, t3, t4, t5, out_l]
    )

    # Combine os estilos (opcional, mas recomendado para manter seu layout)
    full_css = css_content + "\n" + alpheios_css

    # Chame o launch assim:
    demo.launch(
        css=full_css,
        head=alpheios_css, # O Alpheios precisa do link no head
        js=alpheios_js,
        ssr_mode=False  # <--- Adicione esta linha aqui
    )