Spaces:

amaisto
/

CO3

Running

App Files Files Community

CO3 / src /app.py

amaisto

Upload app.py

57ae750 verified 13 days ago

raw

history blame contribute delete

35.5 kB

	import math
	import numpy as np
	import streamlit as st
	from tomlkit import datetime
	from engine import AnalyzerEngine
	import pandas as pd
	import datetime
	import plotly.graph_objects as go
	import os

	COUNTER_FILE = "/data/visits.txt"

	def get_and_increment_visits():
	# Verifica che la cartella persistente esista
	if not os.path.exists("/data"):
	return "Non abilitato"

	# Legge il valore attuale
	if os.path.exists(COUNTER_FILE):
	with open(COUNTER_FILE, "r") as f:
	content = f.read().strip()
	visits = int(content) if content.isdigit() else 0
	else:
	visits = 0

	# Incrementa e salva
	visits += 1
	with open(COUNTER_FILE, "w") as f:
	f.write(str(visits))

	return visits

	# Esegui l'incremento solo una volta per sessione
	if 'visit_counted' not in st.session_state:
	st.session_state.total_visits = get_and_increment_visits()
	st.session_state.visit_counted = True
	# 1. Configurazione della pagina
	st.set_page_config(page_title="Co\u00B3 Suite", layout="wide")
	st.markdown("""
	<style>
	/* Forza il contenitore dei tab ad andare a capo in automatico */
	div[data-baseweb="tab-list"] {
	flex-wrap: wrap !important;
	gap: 8px; /* Spazio orizzontale e verticale tra i tab */
	}

	/* Aggiunge un po' di margine per staccare visivamente le righe */
	button[data-baseweb="tab"] {
	margin-bottom: 5px !important;
	white-space: nowrap !important; /* Evita che il testo dentro il singolo tab vada a capo */
	}
	</style>
	""", unsafe_allow_html=True)
	# 2. Caricamento del motore con Cache
	@st.cache_resource
	def load_full_engine():
	return AnalyzerEngine()

	engine = load_full_engine()

	# 3. Inizializzazione dello stato della sessione
	if 'res' not in st.session_state:
	st.session_state.res = None

	def prepare_export_data(res, fig_res=None):
	rows = []

	# Mappatura dei dizionari principali (Tab 1, 2, 3, 4)
	categories = {
	"Statistiche Base": res.get("basic", {}),
	"Lessico": res.get("lexical", {}),
	"Sintassi e Verbi": res.get("syntax", {}),
	"Coesione": res.get("cohesion", {}),
	"Consecutio": res.get("consecutio", {}),
	"Combined": res.get("combined", {})
	}

	for cat_name, dict_data in categories.items():
	for key, value in dict_data.items():
	# Escludiamo liste lunghe (come i testi delle frasi o il CoNLL)
	if isinstance(value, (int, float, str)) and key not in ["texts", "conll", "doc"]:
	rows.append({"Categoria": cat_name, "Metrica": key, "Valore": value})

	# Gestione specifica per i Connettori (Tab 4 - Dizionario nidificato)
	connectors = res.get("cohesion", {}).get("connectors", {})
	for conn_type, val in connectors.items():
	rows.append({"Categoria": "Connettori", "Metrica": f"Conn_{conn_type}", "Valore": val})

	# Gestione BERT (Tab 5)
	bert_metrics = ["mds_s", "mds_w", "total"]
	for m in bert_metrics:
	val = fig_res.get(m, 0) if fig_res else 0
	rows.append({"Categoria": "Figuratività (BERT)", "Metrica": m, "Valore": val})

	return pd.DataFrame(rows)


	def clear_all():
	st.session_state.res = None
	st.session_state.input_text = ""

	st.title("📝 Co\u00B3 Suite")
	st.caption("Hybrid Text Analysis Dashboard with parameters for Coherence, Cohesion and Complexity evaluation powered by a custom-built engine and BERT.")

	lang_choice = st.radio("Language:", ["English", "Italiano"], horizontal=True)
	st.session_state.lang = "it" if lang_choice == "Italiano" else "en"


	# 4. Area di input
	uploaded_file = st.file_uploader("Upload a text file (.txt)", type=("txt"))
	if uploaded_file is not None:
	# Leggiamo il contenuto del file
	stringio = uploaded_file.getvalue().decode("utf-8")
	# Aggiorniamo lo stato della sessione per visualizzarlo nella text_area
	st.session_state.input_text = stringio
	if len(stringio) > 30000:
	st.session_state.input_text = stringio[:30000] # Troncamento del testo per sicurezza
	st.session_state.source_name = uploaded_file.name.replace(".txt", "")
	else:
	# Se non c'è un file, verifichiamo se c'è testo nell'area
	if st.session_state.get('input_text', "").strip():
	st.session_state.source_name = "CustomText"

	txt = st.text_area("Insert Text (max 30,000 characters):",
	height=200,
	max_chars=30000,
	key="input_text")
	input_final = st.session_state.input_text if uploaded_file is None else stringio

	col_btn1, col_btn2, _ = st.columns([1, 1, 4])
	with col_btn1:
	if st.button("Start Analysis", type="primary"):
	if len(input_final) > 30000:
	input_text = st.session_state.input_text[:30000]
	st.error(f"The text is too long ({len(input_final)} characters). The maximum limit is 30,000.")
	if st.session_state.input_text.strip():
	st.session_state.pop("tab6_results", None)
	with st.spinner("Analyzing..."):
	full_res = engine.run(st.session_state.input_text,lang=st.session_state.lang)
	# Salviamo i risultati testuali
	st.session_state.res = full_res
	# Salviamo l'oggetto DOC separatamente per BERT
	st.session_state.nlp_doc = full_res["doc"]
	else:
	st.warning("Please insert some text.")
	with col_btn2:
	st.button("Clean Fields", on_click=clear_all)

	# 5. Visualizzazione dei 34 Output
	if st.session_state.res:
	r = st.session_state.res
	b, l, s, c , d = r["basic"], r["lexical"], r["syntax"], r["cohesion"], r['consecutio']

	t1, t2, t3, t4, t5, t6, t7 = st.tabs([
	"📊 Basical Statistics",
	"📚 Lexical features ",
	"🏗️ Sintax & Verbs",
	"🧠 Cohesion & Connectives",
	"🎭 Figurative Language",
	"📈 Combined Metrics",
	"📖 ILA Index"
	])

	# --- TAB 1: STATISTICHE BASE & RICCHEZZA ---
	with t1:
	st.subheader("Quantitative Basics")
	c1, c2, c3, c4 = st.columns(4)
	c1.metric("Total Tokens", b["tokens"])
	c2.metric("Total Sentences", b["sentences"])
	c3.metric("Total Paragraphs", b["paragraphs"])
	c4.metric("Hapax Legomena", l.get("hapax", 0), help="Number of words that appear only once in the text.")
	c1.metric("Hapax Ratio", l.get("hapax_ratio", 0))
	c2.metric("Type-Token Ratio (TTR)", l.get("TTR", 0), help="Ratio of unique words (types) to total words (tokens).")
	c3.metric("Gunning Fog Index", l.get("gunning_fog", 0), help="An estimate of the years of formal education needed to understand the text on a first reading. This metric is based on the average sentence length and the percentage of complex words (words with three or more syllables).")
	c4.metric("HD-D Index", l.get("HD-D", 0), help="A measure of lexical diversity that accounts for the frequency of word usage. It is calculated as the ratio of the number of unique words (types) to the total number of words (tokens) raised to the power of 0.5. A higher HD-D index indicates greater lexical diversity, while a lower index suggests more repetition in word usage.")

	st.divider()
	st.subheader("Lexical Range Metrics and Concreteness")
	col_r1, col_r2, col_r3, col_r4 = st.columns(4)
	col_r1.metric("Lexical Range 1", l.get("r1", 0),help="Lexical Range 1: Percentage of words that are among the 1000 most common words in the language according to the dictionaries of the Nation’s Range program. A lower percentage indicates a wider lexical range, while a higher percentage suggests a more limited vocabulary.")
	col_r2.metric("Lexical Range 2", l.get("r2", 0), help="Lexical Range 2: Percentage of words that are among the 2000 most common words in the language according to the dictionaries of the Nation’s Range program. ")
	col_r3.metric("Lexical Range 3", l.get("r3", 0), help="Lexical Range 3: Percentage of words percentage of words belonging to \"The Academic Word List\"")
	col_r4.metric("Concreteness (MRC)", round(l.get("concreteness", 0), 2), help="Average concreteness rating of the words in the text based on the MRC Psycholinguistic Database. Concreteness ratings range from 100 (very abstract) to 700 (very concrete). A higher average concreteness score indicates that the text contains more concrete and tangible words, while a lower score suggests a more abstract vocabulary.")

	st.divider()
	st.subheader("🔗 Bigrams & Trigrams (PMI)")

	# Recupera i lemmi dal doc salvato
	if "nlp_doc" in st.session_state:
	current_doc = st.session_state.nlp_doc
	all_lemmas = [t.lemma_.lower() for t in current_doc if not t.is_punct and not t.is_space]

	c_bi, c_tri = st.columns(2)
	with c_bi:
	df_bi = engine.freq_mod.get_pmi(all_lemmas, n=2)
	st.caption("Top Bigrams")
	st.dataframe(df_bi, use_container_width=True, hide_index=True)
	if not df_bi.empty:
	st.download_button("📥 Download Bigrams", df_bi.to_csv(index=False).encode('utf-8'), "bigrams.csv", "text/csv", key="dl_bi")
	with c_tri:
	df_tri = engine.freq_mod.get_pmi(all_lemmas, n=3)
	st.caption("Top Trigrams")
	st.dataframe(df_tri, use_container_width=True, hide_index=True)
	if not df_tri.empty:
	st.download_button("📥 Download Trigrams", df_tri.to_csv(index=False).encode('utf-8'), "trigrams.csv", "text/csv", key="dl_tri")

	st.divider()
	st.subheader("📈 Frequencies & TF-IDF Trends")

	# Controlli UI per la divisione
	col_split1, col_split2 = st.columns([1, 3])
	split_mode = col_split1.radio("Split Method:", ["numeric", "regex"])
	if split_mode == "numeric":
	split_val = col_split2.number_input("Number of parts:", min_value=2, max_value=20, value=5)
	else:
	split_val = col_split2.text_input("Regex pattern (e.g. \\n\\n for paragraphs):", value="\n\n")

	# Calcolo dei trend on the fly
	chunks = engine.freq_mod.chunk_doc(st.session_state.input_text, current_doc, mode=split_mode, val=split_val)
	df_freq, df_tfidf = engine.freq_mod.get_trends(chunks)

	if not df_freq.empty:
	# Seleziona le parole più frequenti come default
	top_overall = df_freq.groupby("Word")["Count"].sum().nlargest(5).index.tolist()

	selected_words = st.multiselect("Select words to plot:",
	options=df_freq["Word"].unique(),
	default=top_overall)

	if selected_words:
	plot_data_freq = df_freq[df_freq["Word"].isin(selected_words)]
	plot_data_tfidf = df_tfidf[df_tfidf["Word"].isin(selected_words)]

	# Grafico Frequenze Assolute
	fig_f = go.Figure()
	for word in selected_words:
	w_data = plot_data_freq[plot_data_freq["Word"] == word]
	fig_f.add_trace(go.Scatter(x=w_data["Part"], y=w_data["Count"], mode='lines+markers', name=word))
	fig_f.update_layout(title="Absolute Frequencies per Chunk", xaxis_title="Chunk", yaxis_title="Count")

	# Grafico TF-IDF
	fig_t = go.Figure()
	for word in selected_words:
	w_data = plot_data_tfidf[plot_data_tfidf["Word"] == word]
	fig_t.add_trace(go.Scatter(x=w_data["Part"], y=w_data["TF-IDF"], mode='lines+markers', name=word, line=dict(dash='dot')))
	fig_t.update_layout(title="TF-IDF per Chunk", xaxis_title="Chunk", yaxis_title="TF-IDF Score")

	st.plotly_chart(fig_f, use_container_width=True)
	st.plotly_chart(fig_t, use_container_width=True)
	# Bottoni di download per i dati completi
	st.divider()
	col_dl1, col_dl2 = st.columns(2)
	col_dl1.download_button(
	"📥 Download Complete Frequencies",
	df_freq.to_csv(index=False).encode('utf-8'),
	"frequecies.csv", "text/csv", key="dl_freq"
	)
	col_dl2.download_button(
	"📥 Scarica TF-IDF Completo",
	df_tfidf.to_csv(index=False).encode('utf-8'),
	"tfidf.csv", "text/csv", key="dl_tfidf"
	)
	else:
	st.info("Select at least one word to display the charts.")

	# --- TAB 2: LESSICO & DEISSI ---
	with t2:
	st.subheader("Pronouns and Nouns")
	l1, l2, l3, l4 = st.columns(4)
	l1.metric("Pronouns", l.get("pronouns", 0))
	l2.metric("Nouns", l.get("nouns", 0))
	l3.metric("Ratio Pronouns/Nouns", l.get("pron_noun_ratio", 0))
	l4.metric("First Person Pronouns", l.get("first_person_ratio", 0))
	l1.metric("Modifiers per Noun", s.get("mod_per_noun", 0),help="Average number of modifiers (adjectives, relative clauses, etc.) per noun in the text. A higher value indicates a more descriptive and detailed use of nouns, while a lower value suggests a simpler noun usage.")


	st.divider()
	st.subheader("Adjectives and Emphatic particles")
	d1, d2, d3, d4 = st.columns(4)
	d1.metric("Deictics", l.get("deictics", 0))
	d2.metric("Adjectives", s.get("adj_count", 0))
	d3.metric("Adj per Sentence", s.get("adj_x_sent", 0))
	d4.metric("Emphatic Particles", l.get("emphatic_particles", 0))
	d1.metric("Deictic/Articles", l.get("deictic_Frequency", 0))
	d2.metric("attributive/Adj Ratio", s.get("attr_adjs_ratio", 0))
	d3.metric("attributive Adj Frequency", s.get("attr_adjs_freq", 0))

	st.divider()
	st.subheader("Articles")
	j1, j2, j3, j4 = st.columns(4)
	j1.metric("Articles", l.get("articles", 0))
	j2.metric("Definite Articles", l.get("definite_articles", 0))
	j3.metric("Demonstratives articles", l.get("demonstratives", 0))
	j3.metric("Demonstratives per sentence", l.get("demonstratives_ratio", 0))
	j2.metric("Definite articles per sentence", l.get("definite_articles_ratio", 0))


	# --- TAB 3: SINTASSI & VERBI ---
	with t3:
	st.subheader("Syntactic Measures")
	s1, s2, s3, s4 = st.columns(4)
	s1.metric("Average Sentence Length", s.get("avg_sent_len", 0))
	s2.metric("Subordinate per Sentence", s.get("sub_ratio", 0))
	s3.metric("Relatives per Sentence", s.get("rel_clauses_per_sent", 0))
	s4.metric("Distance from Root", s.get("root_dist", 0),help="Average distance from the root of the dependency tree to the other nodes. A higher value indicates a more complex syntactic structure, while a lower value suggests a simpler structure.")
	s1.metric("Hypotactic depth", d.get("avg_depth", 0),help="Average depth of the dependency tree.")
	s2.metric("sentence depth variance", round(d.get("sentence_depths", {}), 4), help="Variance of the depths of the dependency trees across sentences. A higher variance indicates greater variability in sentence complexity, while a lower variance suggests more uniformity in sentence structure.")
	s1.metric("Punctuation Pairs per Sentence", s.get("punct_pairs_per_sent", 0),help="Number of punctuation pairs (e.g., parentheses, quotes) per sentence.")
	s2.metric("Subj-Verb_Obj Inversions per sentence", s.get("svo_inversions_per_sent", 0),help="Number of subj-verb-obj inversions per sentence.")
	s3.metric("Number of Subordinate Clauses (completive excluded)", s.get("non_comp_sub_per_sent", 0),help="Number of non-completive subordinate clauses per sentence.")
	st.divider()

	st.subheader("Verbs & Tenses")
	v1, v2, v3, v4 = st.columns(4)
	v1.metric("Ratio Present/Verbs", s.get("present_ratio", 0))
	v2.metric("Ratio Past/Verbs", s.get("past_ratio", 0))
	v3.metric("Ratio Participles/Verbs", s.get("participle_ratio", 0))
	v4.metric("Consecutio Index", d.get("consecutio_index", 0),help="Index measuring the sequential relationship between clauses.")
	v1.metric("Temporal Stability", f"{d.get('tense_stability', 0)}", help="Percentage of verbs that maintain the same tense across the text. A higher percentage indicates greater temporal stability, while a lower percentage suggests more frequent tense shifts.")
	v2.metric("Verbal Density", f"{d.get('verb_density', 0)}",help="Percentage of words that are verbs in the text. A higher percentage indicates a more verb-heavy text, while a lower percentage suggests a less verb-heavy text.")

	st.divider()
	st.subheader("Dependency Tree Inspection (CoNLL-U)")

	# Selettore della frase per visualizzare il relativo CoNLL-U
	idx = st.selectbox(
	"Select a sentence to inspect:",
	range(len(b["texts"])),
	format_func=lambda i: f"Frase {i+1}: {b['texts'][i][:70]}..."
	)

	# Area di testo per mostrare il contenuto generato dall'engine
	st.text_area(
	label="CoNLL-U Format (Tab-Separated):",
	value=b["conll"][idx],
	height=300
	)

	# --- TAB 4: COESIONE & CONNETTIVI ---
	with t4:
	st.subheader("Textual Cohesion Metrics")
	m1, m2, m3, m4 = st.columns(4)
	m1.metric("Lemma Overlap (adjacent)", f"{c.get('lexical_cohesion_local', 0)*100:.2f}%",help="Percentage of lemmas that are shared between adjacent paragraphs. A higher percentage indicates stronger local cohesion, while a lower percentage suggests weaker local cohesion.")
	m2.metric("Lemma Overlap (3 paragraphs)", f"{c.get('lexical_cohesion_global', 0)*100:.2f}%",help="Percentage of lemmas that are shared between sentences in different paragraphs. ")
	m3.metric("Semantic Overlap (Sentences)", f"{c.get('semantic_cohesion_sentences', 0)*100:.2f}%",help="Percentage of semantic relationships that are shared between adjacent sentences. This metric is calculated by using BERT to identify similarities in meaning between sentences. ")
	m4.metric("Semantic Overlap (Paragraphs)", f"{c.get('semantic_cohesion_paragraphs', 0)*100:.2f}%",help="Percentage of semantic relationships that are shared between sentences in different paragraphs. ")

	st.divider()
	st.subheader("Connector Frequency (Normalized)")

	# Mapping per visualizzare tutti gli 8 tipi richiesti
	conn_data = c.get("connectors", {})
	col_c1, col_c2 = st.columns(2)
	with col_c1:
	st.bar_chart(pd.Series({
	"Adictives +": conn_data.get("AdPos", 0),
	"Adictives -": conn_data.get("AdNeg", 0),
	"Causals +": conn_data.get("CausPos", 0),
	"Causals -": conn_data.get("CausNeg", 0)
	}))
	with col_c2:
	st.bar_chart(pd.Series({
	"Temporals +": conn_data.get("TempPos", 0),
	"Temporals -": conn_data.get("TempNeg", 0),
	"Logics +": conn_data.get("LogPos", 0),
	"Logics -": conn_data.get("LogNeg", 0)
	}))


	st.info(f"Cohesion Value\n\n{c.get('general_cohesion', 0)}\n\nThe logarithm of the standard deviation, using the weighted sum of frequencies as the base.")

	# Tab 5: FIGURATIVE LANGUAGE ANALYSIS
	with t5:
	st.header("🎭 Figurative Language Analysis (BERT V5)", help="The analysis may take some time to complete. Large texts may require several minutes.")

	if "nlp_doc" not in st.session_state:
	st.warning("Before proceeding, run the general analysis in Tab 1.")
	else:
	# Creiamo tre colonne: una per il tasto, una per la spunta, una per lo slider
	col_btn, col_chk, col_sld = st.columns([1.5, 1, 2], gap="medium")

	with col_chk:
	use_sampling = st.checkbox("Sample mode", value=True, help="Analyze only a random portion of the text.")

	with col_sld:
	sample_rate = 1.0
	if use_sampling:
	sample_rate = st.slider("Sample Size %", 5, 95, 10, 5) / 100

	with col_btn:
	# Il bottone ora è allineato agli altri widget
	launch = st.button("Launch Metaphor Detector", type="secondary", use_container_width=True)

	if launch:
	with st.spinner("BERT is executing the Masked Language Modeling..."):
	lang = st.session_state.get("lang", "en")
	res = engine.fig_mod.analyze(st.session_state.nlp_doc,
	sample_rate=sample_rate,
	lang=lang)
	st.session_state.fig_results_data = res
	if "fig_results_data" in st.session_state:
	res = st.session_state.fig_results_data
	st.subheader("MDS indices (Metaphor Density Score)")
	c1, c2, c3 = st.columns(3)
	c1.metric("MDS-S (per Sentence)", round(res["mds_s"], 4))
	st.subheader("MDS indices (Metaphor Density Score)")
	c1, c2, c3 = st.columns(3)
	c1.metric("MDS-S (per Sentence)", round(res["mds_s"], 4),help="Metaphor Density Score per Sentence: This metric calculates the average number of metaphors per sentence in the text. Metaphor detection scans syntactic pairs (Subj-Verb, Obj-Verb, Noun-Adj) by masking terms and using BERT to predict contextual expectations; it flags a metaphor when the semantic similarity between the original word (neutralized via person/thing placeholders) and BERT's top candidates falls below a 0.90 threshold.")
	c2.metric("MDS-W (per 1k Words)", round(res["mds_w"], 2),help="This metric calculates the average number of metaphors per 1,000 words in the text.")
	c3.metric("Total Metaphors", res["total"],help="Total number of metaphors detected in the text.")
	if res.get("is_sample"):
	st.caption(f"⚠️ Note: These scores are estimated based on a {sample_rate*100:.0f}% random sample of the text.")
	st.divider()
	st.subheader("🔍 Retrieved semantic anomalies")

	if not res["detections"]:
	st.info("No semantic anomalies detected with the current thresholds.")
	else:
	for d in res["detections"]:
	# Box colorato in base alla probabilità
	color = "red" if d['probability'] > 85 else "orange" if d['probability'] > 70 else "blue"
	with st.expander(f":{color}[{d['term']} ↔ {d['head']}] - Probabilità: {d['probability']}%"):
	st.write(f"Context: _{d['sentence']}_")
	st.caption(f"Logic: {d['reason']} \| s1: {d['s1']} \| s2: {d['s2']}")

	# --- TAB 6: COMBINED METRICS ---
	with t6:
	st.subheader("📈 Combined Metrics & Positioning")
	if st.session_state.get("lang", "en") == "it":
	st.warning(
	"⚠️ Combined Metrics not available for Italian Language.\n\n"
	"The reference database is in English. "
	"The Italian module is under developement."
	)
	else:
	if st.session_state.res:
	fig_results = st.session_state.get("fig_results_data", None)
	if not fig_results:
	st.info("💡 Caution: the figurative metrics (BERT) have not been calculated. The model will automatically exclude them from comparison with historical texts.")

	# Tasto di avvio
	if st.button("Calculate Positioning and Quality Score", type="primary"):
	with st.spinner("Clustering and Correlation Calculation in Progress..."):
	try:
	source_name = st.session_state.get("source_name", "CustomText")

	# CHIAMATA AL MOTORE MODIFICATO (Estrae 3 variabili)
	q_score, fig_clustering, classe_assegnata = engine.run_combined_analysis(
	res=st.session_state.res,
	fig_res=fig_results,
	source_name=source_name,
	)
	# Salviamo tutto in session_state
	st.session_state.tab6_results = (q_score, fig_clustering, classe_assegnata)
	except FileNotFoundError:
	st.warning("Historical database ('database_completo_largo.pkl') not found.")
	except Exception as e:
	st.error(f"Error occurred during combined analysis: {e}")

	# MOSTRA I RISULTATI
	if "tab6_results" in st.session_state:
	q_score, fig_clustering, classe_assegnata = st.session_state.tab6_results

	# ---> QUI MOSTRIAMO ENTRAMBE LE METRICHE <---
	col_score, col_desc = st.columns([1, 2])

	with col_score:
	# Il punteggio numerico sta benissimo nel widget metric
	st.metric("Quality Score", f"{q_score:.3f}")

	with col_desc:
	if q_score >= 3:
	st.success("🌟 Excellent!")
	elif q_score >= 2:
	st.info("📊 Good: Balanced style, tending towards quality works.")
	elif q_score < 2 and q_score >= -15:
	st.warning("⚠️ Fair: Common traits with consumer or debut literature.")
	elif q_score < 0.5:
	st.error("📉 Poor: Stylistically close to amateur or basic works.")

	with st.expander("ℹ️ How is this score interpreted?"):
	st.markdown("""
	The score is calculated by measuring the stylistic correlation (Pearson) of your text against 4 predefined classes of works, penalizing similarity to amateur texts.

	* theoric range: from 0 to 4
	* Towards 3.0: Maximum affinity with Great Classics/Masterpieces and Great Bestsellers.
	* Around 2.0: Neutral or hybrid style.
	* Towards 0.0: Maximum affinity with Amateur Works and Basic Genre Literature.
	""")
	# La classe predetta usa un box informativo che va a capo da solo!
	st.info(f"🏷️ Predicted Class:\n\n{classe_assegnata}")
	st.divider()
	st.plotly_chart(fig_clustering, use_container_width=True)

	else:
	st.info("Perform the basic analysis in Tab 1 first.")


	# --- TAB 7: INDICE ILA (Leggibilità) ---
	with t7:
	st.subheader("📖 Index of Automatic Readability (ILA)")

	# 1. Recupero dei dati
	lr1 = l.get("r1", 0)
	lr2 = l.get("r2", 0)
	lr3 = l.get("r3", 0)

	punct = s.get("punct_pairs_per_sent", 0)
	svo = s.get("svo_inversions_per_sent", 0)
	non_comp = s.get("non_comp_sub_per_sent", 0)

	# 2. Calcolo della componente Lessicale
	somma_lr = lr1 + lr2 + lr3
	comp_lessicale = math.log(((lr1 * 2) + lr2 + lr3) / somma_lr if somma_lr > 0 else 0.0)

	# 3. Calcolo della penalità Sintattica
	comp_sintattica = (punct + svo + non_comp)
	# 4. Calcolo ILA finale
	ila_score = comp_lessicale - comp_sintattica

	# 5. Visualizzazione Metriche Base
	col_il1, col_il2, col_il3 = st.columns(3)
	col_il1.metric("Lexical Readability", f"{comp_lessicale:.4f}", help="(Log((LR1 * 2) + LR2 + LR3) / Somma LR)")
	col_il2.metric("Sytactic Complexity", f"- {comp_sintattica:.4f}", help="Sum of: Punctuation Pairs per Sentence + SVO Inversions per Sentence + Non-completive Subordinate Clauses per Sentence")
	col_il3.metric("Final ILA Score", f"{ila_score:.4f}")

	st.info("💡 How to Interpret the ILA Index: A higher value indicates greater readability and fluency. The score rewards the use of high-frequency vocabulary (LR1) and penalizes syntactic complexity (inversions, parentheses, and complex subordinate clauses).")

	st.divider()

	# --- 6. GRAFICO DI COMPARAZIONE ILA ---
	st.subheader("📊 Comparison of ILA with Reference Works")

	# Dati di riferimento estratti dalla tua tabella
	books = ["Ulysses (Part III)<br>(Hard)", "The Hobbit<br>(Med. Hard)", "The Little Prince<br>(Med. Easy)", "Isodora Moon goes to School<br>(Easy)", "📍 Your Text"]
	lex_vals = [0.63, 0.64, 0.65, 0.64, comp_lessicale]
	syn_vals = [-2.73, -1.29, -0.87, -0.53, -comp_sintattica] # Negativi per spingerli sotto lo zero visivamente
	ila_vals = [-2.1, -0.65, -0.23, 0.11, ila_score]

	fig_ila = go.Figure()

	# Barre per le componenti
	fig_ila.add_trace(go.Bar(name='Lexical Readability (+)', x=books, y=lex_vals, marker_color='#00cc96'))
	fig_ila.add_trace(go.Bar(name='Sytactic Complexity (-)', x=books, y=syn_vals, marker_color='#ff4b4b'))

	# Linea per il punteggio finale
	fig_ila.add_trace(go.Scatter(
	name='ILA Score',
	x=books, y=ila_vals,
	mode='lines+markers+text',
	text=[f"{v:.2f}" for v in ila_vals],
	textposition="top center",
	textfont=dict(color="black", size=12),
	marker=dict(color='white', size=10, line=dict(color='black', width=2)),
	line=dict(color='black', width=2)
	))

	# Formattazione del grafico
	fig_ila.update_layout(
	barmode='relative', # Impila i positivi sopra e i negativi sotto lo zero
	height=400,
	margin=dict(l=20, r=20, t=40, b=20),
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
	)

	st.plotly_chart(fig_ila, use_container_width=True)

	st.divider()

	# ... (qui inizia la sezione degli Altri Indici e dei tachimetri) ...

	# --- SEZIONE ALTRI INDICI ---
	st.subheader("📊 Comparison of Standard Indices")

	# Recupero HD-D e Gunning Fog (già calcolati dal motore)
	hdd = l.get("HD-D", 0)
	gunning_fog = l.get("gunning_fog", 0)

	# Calcolo Gulpease (approssimando le lettere = caratteri - token per escludere gli spazi)
	tokens_count = b.get("tokens", 1)
	sents_count = b.get("sentences", 1)
	chars_count = b.get("chars", 1)
	letters_count = chars_count - tokens_count

	gulpease = 89 + ((300 * sents_count) - (10 * letters_count)) / tokens_count if tokens_count > 0 else 0
	gulpease = max(0, min(100, gulpease)) # Blocca il valore tra 0 e 100

	# Creazione dei grafici a Tachimetro con Plotly
	# 1. GULPEASE (Più è alto, più è facile)
	fig_gulp = go.Figure(go.Indicator(
	mode = "gauge+number",
	value = gulpease,
	title = {'text': "Gulpease<br><span style='font-size:0.8em;color:gray'>Higher = Easier</span>"},
	gauge = {
	'axis': {'range': [0, 100]},
	'bar': {'color': "rgba(0,0,0,0.5)"},
	'steps' : [
	{'range': [0, 40], 'color': "#ff4b4b"}, # Difficile (Rosso)
	{'range': [40, 60], 'color': "#ffa500"}, # Medio (Arancione)
	{'range': [60, 100], 'color': "#00cc96"} # Facile (Verde)
	]}
	))

	# 2. GUNNING FOG (Più è alto, più è difficile)
	fig_fog = go.Figure(go.Indicator(
	mode = "gauge+number",
	value = gunning_fog,
	title = {'text': "Gunning Fog<br><span style='font-size:0.8em;color:gray'>Higher = Harder</span>"},
	gauge = {
	'axis': {'range': [0, 25]},
	'bar': {'color': "rgba(0,0,0,0.5)"},
	'steps' : [
	{'range': [0, 9], 'color': "#00cc96"}, # Facile (Verde)
	{'range': [9, 14], 'color': "#ffa500"}, # Medio (Arancione)
	{'range': [14, 25], 'color': "#ff4b4b"} # Difficile (Rosso)
	]}
	))

	# 3. HD-D (Più è alto, più il lessico è ricco)
	fig_hdd = go.Figure(go.Indicator(
	mode = "gauge+number",
	value = hdd,
	title = {'text': "Diversity (HD-D)<br><span style='font-size:0.8em;color:gray'>Higher = More Diverse</span>"},
	gauge = {
	'axis': {'range': [0, 50]}, # Range tipico per HD-D
	'bar': {'color': "rgba(0,0,0,0.5)"},
	'steps' : [
	{'range': [0, 30], 'color': "#ff4b4b"}, # Ripetitivo (Rosso)
	{'range': [30, 40], 'color': "#ffa500"}, # Normale (Arancione)
	{'range': [40, 50], 'color': "#00cc96"} # Ricco (Verde)
	]}
	))

	# Riduciamo i margini per farli stare bene in fila
	for fig in [fig_gulp, fig_fog, fig_hdd]:
	fig.update_layout(height=260, margin=dict(l=20, r=20, t=90, b=20))

	# Mostriamo i 3 tachimetri in 3 colonne
	cg1, cg2, cg3 = st.columns(3)
	with cg1:
	st.plotly_chart(fig_gulp, use_container_width=True)
	with cg2:
	st.plotly_chart(fig_fog, use_container_width=True)
	with cg3:
	st.plotly_chart(fig_hdd, use_container_width=True)


	st.divider()
	if st.session_state.res:
	# Recupero dati BERT se presenti
	fig_results = st.session_state.get("fig_results_data", None)

	# Generazione DataFrame
	df_export = prepare_export_data(st.session_state.res, fig_results)

	base_name = st.session_state.get("source_name", "CustomText")
	timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
	csv_name = f"Co3_{base_name}_{timestamp}.csv"
	csv_data = df_export.to_csv(index=False).encode('utf-8')

	st.download_button(
	label="📥 Export Results in CSV",
	data=csv_data,
	file_name=csv_name,
	mime='text/csv'
	)
	visite = st.session_state.get("total_visits", 0)
	st.caption("Co\u00B3 Suite \| {visite}")