import math import numpy as np import streamlit as st from tomlkit import datetime from engine import AnalyzerEngine import pandas as pd import datetime import plotly.graph_objects as go import os COUNTER_FILE = "/data/visits.txt" def get_and_increment_visits(): # Verifica che la cartella persistente esista if not os.path.exists("/data"): return "Non abilitato" # Legge il valore attuale if os.path.exists(COUNTER_FILE): with open(COUNTER_FILE, "r") as f: content = f.read().strip() visits = int(content) if content.isdigit() else 0 else: visits = 0 # Incrementa e salva visits += 1 with open(COUNTER_FILE, "w") as f: f.write(str(visits)) return visits # Esegui l'incremento solo una volta per sessione if 'visit_counted' not in st.session_state: st.session_state.total_visits = get_and_increment_visits() st.session_state.visit_counted = True # 1. Configurazione della pagina st.set_page_config(page_title="Co\u00B3 Suite", layout="wide") st.markdown(""" """, unsafe_allow_html=True) # 2. Caricamento del motore con Cache @st.cache_resource def load_full_engine(): return AnalyzerEngine() engine = load_full_engine() # 3. Inizializzazione dello stato della sessione if 'res' not in st.session_state: st.session_state.res = None def prepare_export_data(res, fig_res=None): rows = [] # Mappatura dei dizionari principali (Tab 1, 2, 3, 4) categories = { "Statistiche Base": res.get("basic", {}), "Lessico": res.get("lexical", {}), "Sintassi e Verbi": res.get("syntax", {}), "Coesione": res.get("cohesion", {}), "Consecutio": res.get("consecutio", {}), "Combined": res.get("combined", {}) } for cat_name, dict_data in categories.items(): for key, value in dict_data.items(): # Escludiamo liste lunghe (come i testi delle frasi o il CoNLL) if isinstance(value, (int, float, str)) and key not in ["texts", "conll", "doc"]: rows.append({"Categoria": cat_name, "Metrica": key, "Valore": value}) # Gestione specifica per i Connettori (Tab 4 - Dizionario nidificato) connectors = res.get("cohesion", {}).get("connectors", {}) for conn_type, val in connectors.items(): rows.append({"Categoria": "Connettori", "Metrica": f"Conn_{conn_type}", "Valore": val}) # Gestione BERT (Tab 5) bert_metrics = ["mds_s", "mds_w", "total"] for m in bert_metrics: val = fig_res.get(m, 0) if fig_res else 0 rows.append({"Categoria": "Figuratività (BERT)", "Metrica": m, "Valore": val}) return pd.DataFrame(rows) def clear_all(): st.session_state.res = None st.session_state.input_text = "" st.title("📝 Co\u00B3 Suite") st.caption("Hybrid Text Analysis Dashboard with parameters for Coherence, Cohesion and Complexity evaluation powered by a custom-built engine and BERT.") lang_choice = st.radio("Language:", ["English", "Italiano"], horizontal=True) st.session_state.lang = "it" if lang_choice == "Italiano" else "en" # 4. Area di input uploaded_file = st.file_uploader("Upload a text file (.txt)", type=("txt")) if uploaded_file is not None: # Leggiamo il contenuto del file stringio = uploaded_file.getvalue().decode("utf-8") # Aggiorniamo lo stato della sessione per visualizzarlo nella text_area st.session_state.input_text = stringio if len(stringio) > 30000: st.session_state.input_text = stringio[:30000] # Troncamento del testo per sicurezza st.session_state.source_name = uploaded_file.name.replace(".txt", "") else: # Se non c'è un file, verifichiamo se c'è testo nell'area if st.session_state.get('input_text', "").strip(): st.session_state.source_name = "CustomText" txt = st.text_area("Insert Text (max 30,000 characters):", height=200, max_chars=30000, key="input_text") input_final = st.session_state.input_text if uploaded_file is None else stringio col_btn1, col_btn2, _ = st.columns([1, 1, 4]) with col_btn1: if st.button("Start Analysis", type="primary"): if len(input_final) > 30000: input_text = st.session_state.input_text[:30000] st.error(f"The text is too long ({len(input_final)} characters). The maximum limit is 30,000.") if st.session_state.input_text.strip(): st.session_state.pop("tab6_results", None) with st.spinner("Analyzing..."): full_res = engine.run(st.session_state.input_text,lang=st.session_state.lang) # Salviamo i risultati testuali st.session_state.res = full_res # Salviamo l'oggetto DOC separatamente per BERT st.session_state.nlp_doc = full_res["doc"] else: st.warning("Please insert some text.") with col_btn2: st.button("Clean Fields", on_click=clear_all) # 5. Visualizzazione dei 34 Output if st.session_state.res: r = st.session_state.res b, l, s, c , d = r["basic"], r["lexical"], r["syntax"], r["cohesion"], r['consecutio'] t1, t2, t3, t4, t5, t6, t7 = st.tabs([ "📊 Basical Statistics", "📚 Lexical features ", "🏗️ Sintax & Verbs", "🧠 Cohesion & Connectives", "🎭 Figurative Language", "📈 Combined Metrics", "📖 ILA Index" ]) # --- TAB 1: STATISTICHE BASE & RICCHEZZA --- with t1: st.subheader("Quantitative Basics") c1, c2, c3, c4 = st.columns(4) c1.metric("Total Tokens", b["tokens"]) c2.metric("Total Sentences", b["sentences"]) c3.metric("Total Paragraphs", b["paragraphs"]) c4.metric("Hapax Legomena", l.get("hapax", 0), help="Number of words that appear only once in the text.") c1.metric("Hapax Ratio", l.get("hapax_ratio", 0)) c2.metric("Type-Token Ratio (TTR)", l.get("TTR", 0), help="Ratio of unique words (types) to total words (tokens).") c3.metric("Gunning Fog Index", l.get("gunning_fog", 0), help="An estimate of the years of formal education needed to understand the text on a first reading. This metric is based on the average sentence length and the percentage of complex words (words with three or more syllables).") c4.metric("HD-D Index", l.get("HD-D", 0), help="A measure of lexical diversity that accounts for the frequency of word usage. It is calculated as the ratio of the number of unique words (types) to the total number of words (tokens) raised to the power of 0.5. A higher HD-D index indicates greater lexical diversity, while a lower index suggests more repetition in word usage.") st.divider() st.subheader("Lexical Range Metrics and Concreteness") col_r1, col_r2, col_r3, col_r4 = st.columns(4) col_r1.metric("Lexical Range 1", l.get("r1", 0),help="Lexical Range 1: Percentage of words that are among the 1000 most common words in the language according to the dictionaries of the Nation’s Range program. A lower percentage indicates a wider lexical range, while a higher percentage suggests a more limited vocabulary.") col_r2.metric("Lexical Range 2", l.get("r2", 0), help="Lexical Range 2: Percentage of words that are among the 2000 most common words in the language according to the dictionaries of the Nation’s Range program. ") col_r3.metric("Lexical Range 3", l.get("r3", 0), help="Lexical Range 3: Percentage of words percentage of words belonging to \"The Academic Word List\"") col_r4.metric("Concreteness (MRC)", round(l.get("concreteness", 0), 2), help="Average concreteness rating of the words in the text based on the MRC Psycholinguistic Database. Concreteness ratings range from 100 (very abstract) to 700 (very concrete). A higher average concreteness score indicates that the text contains more concrete and tangible words, while a lower score suggests a more abstract vocabulary.") st.divider() st.subheader("🔗 Bigrams & Trigrams (PMI)") # Recupera i lemmi dal doc salvato if "nlp_doc" in st.session_state: current_doc = st.session_state.nlp_doc all_lemmas = [t.lemma_.lower() for t in current_doc if not t.is_punct and not t.is_space] c_bi, c_tri = st.columns(2) with c_bi: df_bi = engine.freq_mod.get_pmi(all_lemmas, n=2) st.caption("Top Bigrams") st.dataframe(df_bi, use_container_width=True, hide_index=True) if not df_bi.empty: st.download_button("📥 Download Bigrams", df_bi.to_csv(index=False).encode('utf-8'), "bigrams.csv", "text/csv", key="dl_bi") with c_tri: df_tri = engine.freq_mod.get_pmi(all_lemmas, n=3) st.caption("Top Trigrams") st.dataframe(df_tri, use_container_width=True, hide_index=True) if not df_tri.empty: st.download_button("📥 Download Trigrams", df_tri.to_csv(index=False).encode('utf-8'), "trigrams.csv", "text/csv", key="dl_tri") st.divider() st.subheader("📈 Frequencies & TF-IDF Trends") # Controlli UI per la divisione col_split1, col_split2 = st.columns([1, 3]) split_mode = col_split1.radio("Split Method:", ["numeric", "regex"]) if split_mode == "numeric": split_val = col_split2.number_input("Number of parts:", min_value=2, max_value=20, value=5) else: split_val = col_split2.text_input("Regex pattern (e.g. \\n\\n for paragraphs):", value="\n\n") # Calcolo dei trend on the fly chunks = engine.freq_mod.chunk_doc(st.session_state.input_text, current_doc, mode=split_mode, val=split_val) df_freq, df_tfidf = engine.freq_mod.get_trends(chunks) if not df_freq.empty: # Seleziona le parole più frequenti come default top_overall = df_freq.groupby("Word")["Count"].sum().nlargest(5).index.tolist() selected_words = st.multiselect("Select words to plot:", options=df_freq["Word"].unique(), default=top_overall) if selected_words: plot_data_freq = df_freq[df_freq["Word"].isin(selected_words)] plot_data_tfidf = df_tfidf[df_tfidf["Word"].isin(selected_words)] # Grafico Frequenze Assolute fig_f = go.Figure() for word in selected_words: w_data = plot_data_freq[plot_data_freq["Word"] == word] fig_f.add_trace(go.Scatter(x=w_data["Part"], y=w_data["Count"], mode='lines+markers', name=word)) fig_f.update_layout(title="Absolute Frequencies per Chunk", xaxis_title="Chunk", yaxis_title="Count") # Grafico TF-IDF fig_t = go.Figure() for word in selected_words: w_data = plot_data_tfidf[plot_data_tfidf["Word"] == word] fig_t.add_trace(go.Scatter(x=w_data["Part"], y=w_data["TF-IDF"], mode='lines+markers', name=word, line=dict(dash='dot'))) fig_t.update_layout(title="TF-IDF per Chunk", xaxis_title="Chunk", yaxis_title="TF-IDF Score") st.plotly_chart(fig_f, use_container_width=True) st.plotly_chart(fig_t, use_container_width=True) # Bottoni di download per i dati completi st.divider() col_dl1, col_dl2 = st.columns(2) col_dl1.download_button( "📥 Download Complete Frequencies", df_freq.to_csv(index=False).encode('utf-8'), "frequecies.csv", "text/csv", key="dl_freq" ) col_dl2.download_button( "📥 Scarica TF-IDF Completo", df_tfidf.to_csv(index=False).encode('utf-8'), "tfidf.csv", "text/csv", key="dl_tfidf" ) else: st.info("Select at least one word to display the charts.") # --- TAB 2: LESSICO & DEISSI --- with t2: st.subheader("Pronouns and Nouns") l1, l2, l3, l4 = st.columns(4) l1.metric("Pronouns", l.get("pronouns", 0)) l2.metric("Nouns", l.get("nouns", 0)) l3.metric("Ratio Pronouns/Nouns", l.get("pron_noun_ratio", 0)) l4.metric("First Person Pronouns", l.get("first_person_ratio", 0)) l1.metric("Modifiers per Noun", s.get("mod_per_noun", 0),help="Average number of modifiers (adjectives, relative clauses, etc.) per noun in the text. A higher value indicates a more descriptive and detailed use of nouns, while a lower value suggests a simpler noun usage.") st.divider() st.subheader("Adjectives and Emphatic particles") d1, d2, d3, d4 = st.columns(4) d1.metric("Deictics", l.get("deictics", 0)) d2.metric("Adjectives", s.get("adj_count", 0)) d3.metric("Adj per Sentence", s.get("adj_x_sent", 0)) d4.metric("Emphatic Particles", l.get("emphatic_particles", 0)) d1.metric("Deictic/Articles", l.get("deictic_Frequency", 0)) d2.metric("attributive/Adj Ratio", s.get("attr_adjs_ratio", 0)) d3.metric("attributive Adj Frequency", s.get("attr_adjs_freq", 0)) st.divider() st.subheader("Articles") j1, j2, j3, j4 = st.columns(4) j1.metric("Articles", l.get("articles", 0)) j2.metric("Definite Articles", l.get("definite_articles", 0)) j3.metric("Demonstratives articles", l.get("demonstratives", 0)) j3.metric("Demonstratives per sentence", l.get("demonstratives_ratio", 0)) j2.metric("Definite articles per sentence", l.get("definite_articles_ratio", 0)) # --- TAB 3: SINTASSI & VERBI --- with t3: st.subheader("Syntactic Measures") s1, s2, s3, s4 = st.columns(4) s1.metric("Average Sentence Length", s.get("avg_sent_len", 0)) s2.metric("Subordinate per Sentence", s.get("sub_ratio", 0)) s3.metric("Relatives per Sentence", s.get("rel_clauses_per_sent", 0)) s4.metric("Distance from Root", s.get("root_dist", 0),help="Average distance from the root of the dependency tree to the other nodes. A higher value indicates a more complex syntactic structure, while a lower value suggests a simpler structure.") s1.metric("Hypotactic depth", d.get("avg_depth", 0),help="Average depth of the dependency tree.") s2.metric("sentence depth variance", round(d.get("sentence_depths", {}), 4), help="Variance of the depths of the dependency trees across sentences. A higher variance indicates greater variability in sentence complexity, while a lower variance suggests more uniformity in sentence structure.") s1.metric("Punctuation Pairs per Sentence", s.get("punct_pairs_per_sent", 0),help="Number of punctuation pairs (e.g., parentheses, quotes) per sentence.") s2.metric("Subj-Verb_Obj Inversions per sentence", s.get("svo_inversions_per_sent", 0),help="Number of subj-verb-obj inversions per sentence.") s3.metric("Number of Subordinate Clauses (completive excluded)", s.get("non_comp_sub_per_sent", 0),help="Number of non-completive subordinate clauses per sentence.") st.divider() st.subheader("Verbs & Tenses") v1, v2, v3, v4 = st.columns(4) v1.metric("Ratio Present/Verbs", s.get("present_ratio", 0)) v2.metric("Ratio Past/Verbs", s.get("past_ratio", 0)) v3.metric("Ratio Participles/Verbs", s.get("participle_ratio", 0)) v4.metric("Consecutio Index", d.get("consecutio_index", 0),help="Index measuring the sequential relationship between clauses.") v1.metric("Temporal Stability", f"{d.get('tense_stability', 0)}", help="Percentage of verbs that maintain the same tense across the text. A higher percentage indicates greater temporal stability, while a lower percentage suggests more frequent tense shifts.") v2.metric("Verbal Density", f"{d.get('verb_density', 0)}",help="Percentage of words that are verbs in the text. A higher percentage indicates a more verb-heavy text, while a lower percentage suggests a less verb-heavy text.") st.divider() st.subheader("Dependency Tree Inspection (CoNLL-U)") # Selettore della frase per visualizzare il relativo CoNLL-U idx = st.selectbox( "Select a sentence to inspect:", range(len(b["texts"])), format_func=lambda i: f"Frase {i+1}: {b['texts'][i][:70]}..." ) # Area di testo per mostrare il contenuto generato dall'engine st.text_area( label="CoNLL-U Format (Tab-Separated):", value=b["conll"][idx], height=300 ) # --- TAB 4: COESIONE & CONNETTIVI --- with t4: st.subheader("Textual Cohesion Metrics") m1, m2, m3, m4 = st.columns(4) m1.metric("Lemma Overlap (adjacent)", f"{c.get('lexical_cohesion_local', 0)*100:.2f}%",help="Percentage of lemmas that are shared between adjacent paragraphs. A higher percentage indicates stronger local cohesion, while a lower percentage suggests weaker local cohesion.") m2.metric("Lemma Overlap (3 paragraphs)", f"{c.get('lexical_cohesion_global', 0)*100:.2f}%",help="Percentage of lemmas that are shared between sentences in different paragraphs. ") m3.metric("Semantic Overlap (Sentences)", f"{c.get('semantic_cohesion_sentences', 0)*100:.2f}%",help="Percentage of semantic relationships that are shared between adjacent sentences. This metric is calculated by using BERT to identify similarities in meaning between sentences. ") m4.metric("Semantic Overlap (Paragraphs)", f"{c.get('semantic_cohesion_paragraphs', 0)*100:.2f}%",help="Percentage of semantic relationships that are shared between sentences in different paragraphs. ") st.divider() st.subheader("Connector Frequency (Normalized)") # Mapping per visualizzare tutti gli 8 tipi richiesti conn_data = c.get("connectors", {}) col_c1, col_c2 = st.columns(2) with col_c1: st.bar_chart(pd.Series({ "Adictives +": conn_data.get("AdPos", 0), "Adictives -": conn_data.get("AdNeg", 0), "Causals +": conn_data.get("CausPos", 0), "Causals -": conn_data.get("CausNeg", 0) })) with col_c2: st.bar_chart(pd.Series({ "Temporals +": conn_data.get("TempPos", 0), "Temporals -": conn_data.get("TempNeg", 0), "Logics +": conn_data.get("LogPos", 0), "Logics -": conn_data.get("LogNeg", 0) })) st.info(f"Cohesion Value\n\n{c.get('general_cohesion', 0)}\n\nThe logarithm of the standard deviation, using the weighted sum of frequencies as the base.") # Tab 5: FIGURATIVE LANGUAGE ANALYSIS with t5: st.header("🎭 Figurative Language Analysis (BERT V5)", help="The analysis may take some time to complete. Large texts may require several minutes.") if "nlp_doc" not in st.session_state: st.warning("Before proceeding, run the general analysis in Tab 1.") else: # Creiamo tre colonne: una per il tasto, una per la spunta, una per lo slider col_btn, col_chk, col_sld = st.columns([1.5, 1, 2], gap="medium") with col_chk: use_sampling = st.checkbox("Sample mode", value=True, help="Analyze only a random portion of the text.") with col_sld: sample_rate = 1.0 if use_sampling: sample_rate = st.slider("Sample Size %", 5, 95, 10, 5) / 100 with col_btn: # Il bottone ora è allineato agli altri widget launch = st.button("Launch Metaphor Detector", type="secondary", use_container_width=True) if launch: with st.spinner("BERT is executing the Masked Language Modeling..."): lang = st.session_state.get("lang", "en") res = engine.fig_mod.analyze(st.session_state.nlp_doc, sample_rate=sample_rate, lang=lang) st.session_state.fig_results_data = res if "fig_results_data" in st.session_state: res = st.session_state.fig_results_data st.subheader("MDS indices (Metaphor Density Score)") c1, c2, c3 = st.columns(3) c1.metric("MDS-S (per Sentence)", round(res["mds_s"], 4)) st.subheader("MDS indices (Metaphor Density Score)") c1, c2, c3 = st.columns(3) c1.metric("MDS-S (per Sentence)", round(res["mds_s"], 4),help="Metaphor Density Score per Sentence: This metric calculates the average number of metaphors per sentence in the text. Metaphor detection scans syntactic pairs (Subj-Verb, Obj-Verb, Noun-Adj) by masking terms and using BERT to predict contextual expectations; it flags a metaphor when the semantic similarity between the original word (neutralized via person/thing placeholders) and BERT's top candidates falls below a 0.90 threshold.") c2.metric("MDS-W (per 1k Words)", round(res["mds_w"], 2),help="This metric calculates the average number of metaphors per 1,000 words in the text.") c3.metric("Total Metaphors", res["total"],help="Total number of metaphors detected in the text.") if res.get("is_sample"): st.caption(f"⚠️ Note: These scores are estimated based on a {sample_rate*100:.0f}% random sample of the text.") st.divider() st.subheader("🔍 Retrieved semantic anomalies") if not res["detections"]: st.info("No semantic anomalies detected with the current thresholds.") else: for d in res["detections"]: # Box colorato in base alla probabilità color = "red" if d['probability'] > 85 else "orange" if d['probability'] > 70 else "blue" with st.expander(f":{color}[{d['term']} ↔ {d['head']}] - Probabilità: {d['probability']}%"): st.write(f"**Context:** _{d['sentence']}_") st.caption(f"Logic: {d['reason']} | s1: {d['s1']} | s2: {d['s2']}") # --- TAB 6: COMBINED METRICS --- with t6: st.subheader("📈 Combined Metrics & Positioning") if st.session_state.get("lang", "en") == "it": st.warning( "⚠️ **Combined Metrics not available for Italian Language.**\n\n" "The reference database is in English. " "The Italian module is under developement." ) else: if st.session_state.res: fig_results = st.session_state.get("fig_results_data", None) if not fig_results: st.info("💡 **Caution:** the figurative metrics (BERT) have not been calculated. The model will automatically exclude them from comparison with historical texts.") # Tasto di avvio if st.button("Calculate Positioning and Quality Score", type="primary"): with st.spinner("Clustering and Correlation Calculation in Progress..."): try: source_name = st.session_state.get("source_name", "CustomText") # CHIAMATA AL MOTORE MODIFICATO (Estrae 3 variabili) q_score, fig_clustering, classe_assegnata = engine.run_combined_analysis( res=st.session_state.res, fig_res=fig_results, source_name=source_name, ) # Salviamo tutto in session_state st.session_state.tab6_results = (q_score, fig_clustering, classe_assegnata) except FileNotFoundError: st.warning("Historical database ('database_completo_largo.pkl') not found.") except Exception as e: st.error(f"Error occurred during combined analysis: {e}") # MOSTRA I RISULTATI if "tab6_results" in st.session_state: q_score, fig_clustering, classe_assegnata = st.session_state.tab6_results # ---> QUI MOSTRIAMO ENTRAMBE LE METRICHE <--- col_score, col_desc = st.columns([1, 2]) with col_score: # Il punteggio numerico sta benissimo nel widget metric st.metric("Quality Score", f"{q_score:.3f}") with col_desc: if q_score >= 3: st.success("🌟 Excellent!") elif q_score >= 2: st.info("📊 Good: Balanced style, tending towards quality works.") elif q_score < 2 and q_score >= -15: st.warning("⚠️ Fair: Common traits with consumer or debut literature.") elif q_score < 0.5: st.error("📉 Poor: Stylistically close to amateur or basic works.") with st.expander("ℹ️ How is this score interpreted?"): st.markdown(""" The score is calculated by measuring the stylistic correlation (Pearson) of your text against 4 predefined classes of works, penalizing similarity to amateur texts. * **theoric range**: from **0** to **4** * **Towards 3.0**: Maximum affinity with Great Classics/Masterpieces and Great Bestsellers. * **Around 2.0**: Neutral or hybrid style. * **Towards 0.0**: Maximum affinity with Amateur Works and Basic Genre Literature. """) # La classe predetta usa un box informativo che va a capo da solo! st.info(f"🏷️ **Predicted Class:**\n\n{classe_assegnata}") st.divider() st.plotly_chart(fig_clustering, use_container_width=True) else: st.info("Perform the basic analysis in Tab 1 first.") # --- TAB 7: INDICE ILA (Leggibilità) --- with t7: st.subheader("📖 Index of Automatic Readability (ILA)") # 1. Recupero dei dati lr1 = l.get("r1", 0) lr2 = l.get("r2", 0) lr3 = l.get("r3", 0) punct = s.get("punct_pairs_per_sent", 0) svo = s.get("svo_inversions_per_sent", 0) non_comp = s.get("non_comp_sub_per_sent", 0) # 2. Calcolo della componente Lessicale somma_lr = lr1 + lr2 + lr3 comp_lessicale = math.log(((lr1 * 2) + lr2 + lr3) / somma_lr if somma_lr > 0 else 0.0) # 3. Calcolo della penalità Sintattica comp_sintattica = (punct + svo + non_comp) # 4. Calcolo ILA finale ila_score = comp_lessicale - comp_sintattica # 5. Visualizzazione Metriche Base col_il1, col_il2, col_il3 = st.columns(3) col_il1.metric("Lexical Readability", f"{comp_lessicale:.4f}", help="(Log((LR1 * 2) + LR2 + LR3) / Somma LR)") col_il2.metric("Sytactic Complexity", f"- {comp_sintattica:.4f}", help="Sum of: Punctuation Pairs per Sentence + SVO Inversions per Sentence + Non-completive Subordinate Clauses per Sentence") col_il3.metric("Final ILA Score", f"{ila_score:.4f}") st.info("💡 ** How to Interpret the ILA Index:** A higher value indicates greater readability and fluency. The score rewards the use of high-frequency vocabulary (LR1) and penalizes syntactic complexity (inversions, parentheses, and complex subordinate clauses).") st.divider() # --- 6. GRAFICO DI COMPARAZIONE ILA --- st.subheader("📊 Comparison of ILA with Reference Works") # Dati di riferimento estratti dalla tua tabella books = ["Ulysses (Part III)
(Hard)", "The Hobbit
(Med. Hard)", "The Little Prince
(Med. Easy)", "Isodora Moon goes to School
(Easy)", "📍 Your Text"] lex_vals = [0.63, 0.64, 0.65, 0.64, comp_lessicale] syn_vals = [-2.73, -1.29, -0.87, -0.53, -comp_sintattica] # Negativi per spingerli sotto lo zero visivamente ila_vals = [-2.1, -0.65, -0.23, 0.11, ila_score] fig_ila = go.Figure() # Barre per le componenti fig_ila.add_trace(go.Bar(name='Lexical Readability (+)', x=books, y=lex_vals, marker_color='#00cc96')) fig_ila.add_trace(go.Bar(name='Sytactic Complexity (-)', x=books, y=syn_vals, marker_color='#ff4b4b')) # Linea per il punteggio finale fig_ila.add_trace(go.Scatter( name='ILA Score', x=books, y=ila_vals, mode='lines+markers+text', text=[f"{v:.2f}" for v in ila_vals], textposition="top center", textfont=dict(color="black", size=12), marker=dict(color='white', size=10, line=dict(color='black', width=2)), line=dict(color='black', width=2) )) # Formattazione del grafico fig_ila.update_layout( barmode='relative', # Impila i positivi sopra e i negativi sotto lo zero height=400, margin=dict(l=20, r=20, t=40, b=20), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1) ) st.plotly_chart(fig_ila, use_container_width=True) st.divider() # ... (qui inizia la sezione degli Altri Indici e dei tachimetri) ... # --- SEZIONE ALTRI INDICI --- st.subheader("📊 Comparison of Standard Indices") # Recupero HD-D e Gunning Fog (già calcolati dal motore) hdd = l.get("HD-D", 0) gunning_fog = l.get("gunning_fog", 0) # Calcolo Gulpease (approssimando le lettere = caratteri - token per escludere gli spazi) tokens_count = b.get("tokens", 1) sents_count = b.get("sentences", 1) chars_count = b.get("chars", 1) letters_count = chars_count - tokens_count gulpease = 89 + ((300 * sents_count) - (10 * letters_count)) / tokens_count if tokens_count > 0 else 0 gulpease = max(0, min(100, gulpease)) # Blocca il valore tra 0 e 100 # Creazione dei grafici a Tachimetro con Plotly # 1. GULPEASE (Più è alto, più è facile) fig_gulp = go.Figure(go.Indicator( mode = "gauge+number", value = gulpease, title = {'text': "Gulpease
Higher = Easier"}, gauge = { 'axis': {'range': [0, 100]}, 'bar': {'color': "rgba(0,0,0,0.5)"}, 'steps' : [ {'range': [0, 40], 'color': "#ff4b4b"}, # Difficile (Rosso) {'range': [40, 60], 'color': "#ffa500"}, # Medio (Arancione) {'range': [60, 100], 'color': "#00cc96"} # Facile (Verde) ]} )) # 2. GUNNING FOG (Più è alto, più è difficile) fig_fog = go.Figure(go.Indicator( mode = "gauge+number", value = gunning_fog, title = {'text': "Gunning Fog
Higher = Harder"}, gauge = { 'axis': {'range': [0, 25]}, 'bar': {'color': "rgba(0,0,0,0.5)"}, 'steps' : [ {'range': [0, 9], 'color': "#00cc96"}, # Facile (Verde) {'range': [9, 14], 'color': "#ffa500"}, # Medio (Arancione) {'range': [14, 25], 'color': "#ff4b4b"} # Difficile (Rosso) ]} )) # 3. HD-D (Più è alto, più il lessico è ricco) fig_hdd = go.Figure(go.Indicator( mode = "gauge+number", value = hdd, title = {'text': "Diversity (HD-D)
Higher = More Diverse"}, gauge = { 'axis': {'range': [0, 50]}, # Range tipico per HD-D 'bar': {'color': "rgba(0,0,0,0.5)"}, 'steps' : [ {'range': [0, 30], 'color': "#ff4b4b"}, # Ripetitivo (Rosso) {'range': [30, 40], 'color': "#ffa500"}, # Normale (Arancione) {'range': [40, 50], 'color': "#00cc96"} # Ricco (Verde) ]} )) # Riduciamo i margini per farli stare bene in fila for fig in [fig_gulp, fig_fog, fig_hdd]: fig.update_layout(height=260, margin=dict(l=20, r=20, t=90, b=20)) # Mostriamo i 3 tachimetri in 3 colonne cg1, cg2, cg3 = st.columns(3) with cg1: st.plotly_chart(fig_gulp, use_container_width=True) with cg2: st.plotly_chart(fig_fog, use_container_width=True) with cg3: st.plotly_chart(fig_hdd, use_container_width=True) st.divider() if st.session_state.res: # Recupero dati BERT se presenti fig_results = st.session_state.get("fig_results_data", None) # Generazione DataFrame df_export = prepare_export_data(st.session_state.res, fig_results) base_name = st.session_state.get("source_name", "CustomText") timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") csv_name = f"Co3_{base_name}_{timestamp}.csv" csv_data = df_export.to_csv(index=False).encode('utf-8') st.download_button( label="📥 Export Results in CSV", data=csv_data, file_name=csv_name, mime='text/csv' ) visite = st.session_state.get("total_visits", 0) st.caption("Co\u00B3 Suite | {visite}")