Spaces:

kambris
/

V4

Sleeping

App Files Files Community

V4 / app.py

kambris

Create app.py

2187547 verified 30 days ago

raw

history blame contribute delete

33.7 kB

	"""
	Voynich Manuscript Linguistic Analyzer
	Gradio app for statistical analysis of EVA-transliterated Voynich text,
	compared against Latin, Arabic, Hebrew, Medieval Welsh, and Georgian corpora.
	"""

	import io
	import math
	import re
	import os
	import collections
	import tempfile
	import atexit
	from typing import Optional

	import gradio as gr
	import numpy as np
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	from PIL import Image as PILImage

	# ─────────────────────────────────────────────
	# SAMPLE VOYNICH EVA TEXT (small illustrative corpus)
	# ─────────────────────────────────────────────
	SAMPLE_EVA = """
	fachys ykal ar ataiin shol shory cth res y kor sholdy
	qokaiin qokey qokaiin ytaiin yteor qokain qokal y tain ytaiin
	daiin okaiin chedy qokeedy dain shedy daiin ol cheds daiin
	okeedy chedal okeedy dar ar aiin daiin daiin oteedy keedy chal
	chol dy kaiin dar shey qodar ytedy cheds chol dain kaiin
	oteedy sheedy chedal dar ytedy okaiin chedy kaiin shedy chol
	qokeol dar yteedy cheds dar chol qokeedy okeedy chedal dar
	daiin yteedy qokain chedy dar chol daiin okaiin chedy kaiin
	shedy chol oteedy chedal qodar yteol dar daiin okaiin chedy
	qokeol chedal dar chol daiin oteedy chedal dar yteedy okeedy
	fachys ykal ar ataiin shol shory cth res y kor sholdy
	qokaiin qokey qokaiin ytaiin yteor qokain qokal y tain ytaiin
	daiin okaiin chedy qokeedy dain shedy daiin ol cheds daiin
	okeedy chedal okeedy dar ar aiin daiin daiin oteedy keedy chal
	chol dy kaiin dar shey qodar ytedy cheds chol dain kaiin
	oteedy sheedy chedal dar ytedy okaiin chedy kaiin shedy chol
	qokeol dar yteedy cheds dar chol qokeedy okeedy chedal dar
	daiin yteedy qokain chedy dar chol daiin okaiin chedy kaiin
	shedy chol oteedy chedal qodar yteol dar daiin okaiin chedy
	qokeol chedal dar chol daiin oteedy chedal dar yteedy okeedy
	daiin okaiin shedy chol daiin oteedy qokain chedal cheds chol
	shory chol daiin keedy okaiin chedal dar daiin shedy kaiin
	qokeedy chedal yteedy dar chol okeedy daiin chedal shedy dar
	ytaiin qokain daiin chedy qodar shedy okaiin cheds chol daiin
	"""

	# ─────────────────────────────────────────────
	# REFERENCE LANGUAGE CORPORA (romanized / transliterated samples)
	# These are small illustrative samples — real analysis needs larger corpora
	# ─────────────────────────────────────────────
	LANGUAGE_CORPORA = {
	"Latin": """
	arma virumque cano troiae qui primus ab oris italiam fato profugus laviniaque venit
	litora multum ille et terris iactatus et alto vi superum saevae memorem iunonis ob iram
	multa quoque et bello passus dum conderet urbem inferretque deos latio genus unde latinum
	albanique patres atque altae moenia romae musa mihi causas memora quo numine laeso
	quidve dolens regina deum tot volvere casus insignem pietate virum tot adire labores
	impulerit tantaene animis caelestibus irae urbs antiqua fuit tyrii tenuere coloni
	carthago italiam contra tiberinaque longe ostia dives opum studiisque asperrima belli
	quam iuno fertur terris magis omnibus unam posthabita coluisse samo hic illius arma
	hic currus fuit hoc regnum dea gentibus esse si qua fata sinant iam tum tenditque fovetque
	progeniem sed enim troiano a sanguine duci audierat tyrias olim quae verteret arces
	""",
	"Arabic": """
	bismi allahi alrrahmani alrrahimi alhamdu lillahi rabbi alealamina alrrahmani alrrahimi
	maliki yawmi alddini iyyaka nabudu waiyyaka nastainu ihdina alssirata almustaqima
	sirata alladhina anamta ealayhim ghayri almaghdubi ealayhim wala alddalina
	qul huwa allahu ahadun allahu alssamadu lam yalid walam yulad walam yakun lahu
	kufuan ahadun inna anzalnahu fi laylati alqadri wama adraka ma laylatu alqadri
	laylatu alqadri khayrun min alfi shahrin tanazzalu almalaaikatu waalrruhu fiha
	bidni rabbihim min kulli amrin salamun hiya hatta matlaei alfajri alam nashrah
	laka sadraka wawadaena anka wizraka allathee anqada zahraka warafaena laka
	dhikraka fainna maea aleusri yusran inna maea aleusri yusran faitha faraghta
	""",
	"Hebrew": """
	bereshit bara elohim et hashamayim veet haaretz vehaaretz hayta tohu vavohu
	vechoshech al pney tehom veruach elohim merachefet al pney hamayim vayomer
	elohim yehi or vayehi or vayar elohim et haor ki tov vayavdel elohim beyn haor
	uveyn hachoshech vayikra elohim laor yom velachoshech kara layla vayehi erev
	vayehi voker yom echad vayomer elohim yehi rakia betoch hamayim vihi mavdil
	beyn mayim lammayim vayaas elohim et harakia vayavdel beyn hamayim asher
	mitachat larakia uveyn hamayim asher meal larakia vayehi chen vayikra elohim
	larakia shamayim vayehi erev vayehi voker yom sheni vayomer elohim yikavu
	hamayim mitachat hashamayim el makom echad vetera hayabashah vayehi chen
	""",
	"Medieval Welsh": """
	yn y dechreuad y creodd duw y nefoedd ar ddaear yr oedd y ddaear yn adfeilion
	ac yn wag ac yr oedd tywyllwch ar wyneb y dyfnder ac ysbryd duw yn symud ar
	wyneb y dyfroedd a duw a ddywedodd bydded goleuni a bu goleuni a duw a welodd
	y goleuni ei fod yn dda a duw a wahanodd y goleuni oddi wrth y tywyllwch
	ac a alwodd duw y goleuni yn ddydd a galwodd y tywyllwch yn nos ac aeth
	yr hwyr ar bore dydd cyntaf a duw a ddywedodd bydded ffurfafen yng nghanol y dyfroedd
	a bydded yn gwahanu dyfroedd oddi wrth ddyfroedd a gwnaeth duw y ffurfafen
	ac a wahanodd rhwng y dyfroedd oedd tan y ffurfafen ar dyfroedd oedd uwch
	""",
	"Georgian": """
	tavisupali pirovneba arsebobs rasac unda iqos da rasac unda aketebs piradi
	tanxmobis gareSe arc erTi pirovneba verc aaketebs samarTliani sazogadoeba
	romelic TiToeul wevrze mzrunvelobas iCens Tavisuflebas da Tanasworobas
	uzrunvelyofs yvela moqalaqe kanonis winaSe Tanasworad aris arc erTi
	diskriminacia ar aris Semosvla pirovnebis uflebebSi samarTliani
	da Tavisufali sazogadoeba unda aRiardes Tavisufali arCevnis ufleba
	TiToeuli adamiani ibadeba TavisuflebiT da Tanasworad RirsebiT da
	uflebiT isini jildosulia gonebisa da sindisisa da urTierTobaSi
	erTmaneTis mimarT ZmobisduliT unda moiqcnen
	""",
	}

	# ─────────────────────────────────────────────
	# DEFAULT BIGRAPH SPLIT RULES (EVA-based)
	# ─────────────────────────────────────────────
	DEFAULT_BIGRAPH_RULES = """qo -> q o
	ch -> c h
	sh -> s h
	ee -> e e
	ai -> a i
	ol -> o l
	or -> o r
	ar -> a r
	al -> a l
	"""

	# ─────────────────────────────────────────────
	# CORE ANALYSIS FUNCTIONS
	# ─────────────────────────────────────────────

	def parse_bigraph_rules(rules_text: str) -> list[tuple[str, str]]:
	"""Parse bigraph split rules from text format 'xy -> x y'"""
	rules = []
	for line in rules_text.strip().splitlines():
	line = line.strip()
	if not line or "->" not in line:
	continue
	lhs, rhs = line.split("->", 1)
	bigraph = lhs.strip()
	replacement = rhs.strip()
	rules.append((bigraph, replacement))
	return rules


	def apply_bigraph_splits(text: str, rules: list[tuple[str, str]]) -> str:
	"""Apply bigraph splitting rules to text"""
	for bigraph, replacement in rules:
	text = text.replace(bigraph, replacement)
	return text


	def tokenize(text: str) -> list[str]:
	"""Extract clean character tokens (letters only, lowercase)"""
	return [c for c in text.lower() if c.isalpha()]


	def tokenize_words(text: str) -> list[str]:
	"""Extract word tokens"""
	return [w for w in re.findall(r"[a-zA-Z]+", text.lower()) if w]


	def ioc(tokens: list[str]) -> float:
	"""Index of Coincidence"""
	if len(tokens) < 2:
	return 0.0
	freq = collections.Counter(tokens)
	n = len(tokens)
	return sum(f * (f - 1) for f in freq.values()) / (n * (n - 1))


	def entropy_order0(tokens: list[str]) -> float:
	"""Unigram (order-0) entropy in bits"""
	if not tokens:
	return 0.0
	freq = collections.Counter(tokens)
	n = len(tokens)
	return -sum((c / n) * math.log2(c / n) for c in freq.values())


	def entropy_order1(tokens: list[str]) -> float:
	"""Bigram conditional entropy H(X\|Y)"""
	if len(tokens) < 2:
	return 0.0
	bigrams = list(zip(tokens[:-1], tokens[1:]))
	bigram_counts = collections.Counter(bigrams)
	unigram_counts = collections.Counter(tokens[:-1])
	total_bigrams = len(bigrams)
	h = 0.0
	for (a, b), cnt in bigram_counts.items():
	p_ab = cnt / total_bigrams
	p_b_given_a = cnt / unigram_counts[a]
	h -= p_ab * math.log2(p_b_given_a)
	return h


	def entropy_order2(tokens: list[str]) -> float:
	"""Trigram conditional entropy H(X\|YZ)"""
	if len(tokens) < 3:
	return 0.0
	trigrams = list(zip(tokens[:-2], tokens[1:-1], tokens[2:]))
	trigram_counts = collections.Counter(trigrams)
	bigram_counts = collections.Counter(zip(tokens[:-2], tokens[1:-1]))
	total_trigrams = len(trigrams)
	h = 0.0
	for (a, b, c), cnt in trigram_counts.items():
	p_abc = cnt / total_trigrams
	p_c_given_ab = cnt / bigram_counts[(a, b)]
	h -= p_abc * math.log2(p_c_given_ab)
	return h


	def zipf_slope(tokens: list[str]) -> float:
	"""Compute slope of Zipf log-log plot (should be near -1 for natural language)"""
	freq = collections.Counter(tokens)
	counts = sorted(freq.values(), reverse=True)
	if len(counts) < 2:
	return 0.0
	ranks = np.arange(1, len(counts) + 1)
	log_ranks = np.log(ranks)
	log_counts = np.log(np.array(counts, dtype=float))
	slope, _ = np.polyfit(log_ranks, log_counts, 1)
	return slope


	def type_token_ratio(words: list[str]) -> float:
	if not words:
	return 0.0
	return len(set(words)) / len(words)


	def hapax_ratio(words: list[str]) -> float:
	if not words:
	return 0.0
	freq = collections.Counter(words)
	hapax = sum(1 for v in freq.values() if v == 1)
	return hapax / len(set(words))


	def pmi_top_pairs(tokens: list[str], top_n: int = 10) -> list[tuple[tuple, float]]:
	"""Compute top PMI bigram pairs"""
	if len(tokens) < 2:
	return []
	bigrams = list(zip(tokens[:-1], tokens[1:]))
	bg_counts = collections.Counter(bigrams)
	ug_counts = collections.Counter(tokens)
	n = len(tokens)
	pmi_scores = {}
	for (a, b), cnt in bg_counts.items():
	p_ab = cnt / len(bigrams)
	p_a = ug_counts[a] / n
	p_b = ug_counts[b] / n
	if p_a > 0 and p_b > 0 and p_ab > 0:
	pmi_scores[(a, b)] = math.log2(p_ab / (p_a * p_b))
	return sorted(pmi_scores.items(), key=lambda x: -x[1])[:top_n]


	def compute_perplexity(test_tokens: list[str], train_tokens: list[str]) -> float:
	"""Cross-entropy perplexity of test under train bigram model"""
	if len(test_tokens) < 2 or len(train_tokens) < 2:
	return float("inf")
	bigrams = list(zip(train_tokens[:-1], train_tokens[1:]))
	bg_counts = collections.Counter(bigrams)
	ug_counts = collections.Counter(train_tokens[:-1])
	vocab_size = len(set(train_tokens))
	k = 0.5 # Laplace smoothing

	test_bigrams = list(zip(test_tokens[:-1], test_tokens[1:]))
	log_prob = 0.0
	for a, b in test_bigrams:
	numerator = bg_counts.get((a, b), 0) + k
	denominator = ug_counts.get(a, 0) + k * vocab_size
	log_prob += math.log2(numerator / denominator)

	cross_entropy = -log_prob / len(test_bigrams)
	return 2 ** cross_entropy


	def build_stats(tokens: list[str], words: list[str], label: str) -> dict:
	return {
	"label": label,
	"n_chars": len(tokens),
	"n_words": len(words),
	"vocab_chars": len(set(tokens)),
	"vocab_words": len(set(words)),
	"ioc": ioc(tokens),
	"h0": entropy_order0(tokens),
	"h1": entropy_order1(tokens),
	"h2": entropy_order2(tokens),
	"ttr": type_token_ratio(words),
	"hapax": hapax_ratio(words),
	"zipf_slope": zipf_slope(words) if len(words) > 10 else 0.0,
	"tokens": tokens,
	"words": words,
	}


	def distance_vector(stats: dict, voynich_stats: dict) -> float:
	"""Simple Euclidean distance in metric space"""
	features = ["ioc", "h0", "h1", "h2", "ttr", "hapax"]
	v_vec = np.array([voynich_stats[f] for f in features])
	l_vec = np.array([stats[f] for f in features])
	return float(np.linalg.norm(v_vec - l_vec))


	# ─────────────────────────────────────────────
	# PLOTTING FUNCTIONS
	# ─────────────────────────────────────────────

	VOYNICH_COLOR = "#e8c97a"
	LANG_COLORS = ["#7ab8e8", "#e87a7a", "#7ae8a5", "#c87ae8", "#e8a57a"]
	BG_COLOR = "#0f0f14"
	PANEL_COLOR = "#16161f"
	TEXT_COLOR = "#d4cfc8"
	GRID_COLOR = "#2a2a38"


	def fig_to_pil(fig) -> PILImage.Image:
	"""Render a matplotlib figure to a PIL Image, then close the figure."""
	buf = io.BytesIO()
	fig.savefig(buf, format="png", dpi=140, bbox_inches="tight",
	facecolor=BG_COLOR, edgecolor="none")
	plt.close(fig) # FIXED: always close to prevent memory leak
	buf.seek(0)
	return PILImage.open(buf).copy()


	def style_ax(ax, title=""):
	ax.set_facecolor(PANEL_COLOR)
	ax.tick_params(colors=TEXT_COLOR, labelsize=8)
	ax.xaxis.label.set_color(TEXT_COLOR)
	ax.yaxis.label.set_color(TEXT_COLOR)
	for spine in ax.spines.values():
	spine.set_edgecolor(GRID_COLOR)
	ax.grid(True, color=GRID_COLOR, linewidth=0.5, alpha=0.7)
	if title:
	ax.set_title(title, color=TEXT_COLOR, fontsize=9, fontweight="bold", pad=6)


	def plot_ioc_comparison(voynich_stats, lang_stats_list, selected_langs):
	fig, ax = plt.subplots(figsize=(9, 4), facecolor=BG_COLOR)
	labels = ["Voynich"] + selected_langs + ["Random", "English ref"]
	values = [voynich_stats["ioc"]] + [ls["ioc"] for ls in lang_stats_list] + [0.038, 0.065]
	colors = [VOYNICH_COLOR] + LANG_COLORS[:len(selected_langs)] + ["#555566", "#445544"]
	bars = ax.barh(labels, values, color=colors, height=0.55, edgecolor="none")
	ax.axvline(0.038, color="#555566", lw=1, ls="--", alpha=0.6, label="Random (0.038)")
	ax.axvline(0.065, color="#445544", lw=1, ls="--", alpha=0.6, label="English (0.065)")
	ax.set_xlabel("Index of Coincidence", color=TEXT_COLOR)
	style_ax(ax, "Index of Coincidence")
	ax.legend(fontsize=7, labelcolor=TEXT_COLOR, facecolor=PANEL_COLOR, edgecolor=GRID_COLOR)
	for bar, val in zip(bars, values):
	ax.text(val + 0.001, bar.get_y() + bar.get_height() / 2,
	f"{val:.4f}", va="center", color=TEXT_COLOR, fontsize=7.5)
	fig.tight_layout(pad=1.2)
	return fig_to_pil(fig)


	def plot_entropy_curves(voynich_stats, lang_stats_list, selected_langs):
	fig, ax = plt.subplots(figsize=(9, 4.5), facecolor=BG_COLOR)
	orders = [0, 1, 2]
	order_labels = ["H₀ (unigram)", "H₁ (bigram)", "H₂ (trigram)"]

	vy = [voynich_stats["h0"], voynich_stats["h1"], voynich_stats["h2"]]
	ax.plot(orders, vy, "o-", color=VOYNICH_COLOR, lw=2.2, ms=7,
	label="Voynich", zorder=5)

	for i, (ls, lang) in enumerate(zip(lang_stats_list, selected_langs)):
	lv = [ls["h0"], ls["h1"], ls["h2"]]
	ax.plot(orders, lv, "o--", color=LANG_COLORS[i], lw=1.6, ms=5,
	label=lang, alpha=0.85)

	ax.set_xticks(orders)
	ax.set_xticklabels(order_labels, color=TEXT_COLOR, fontsize=8)
	ax.set_ylabel("Entropy (bits)", color=TEXT_COLOR)
	style_ax(ax, "Entropy Curves (H₀ → H₁ → H₂)")
	ax.legend(fontsize=7.5, labelcolor=TEXT_COLOR, facecolor=PANEL_COLOR, edgecolor=GRID_COLOR)
	fig.tight_layout(pad=1.2)
	return fig_to_pil(fig)


	def plot_zipf(voynich_stats, lang_stats_list, selected_langs):
	n_plots = len(selected_langs) + 1
	fig, axes = plt.subplots(1, n_plots,
	figsize=(3 * n_plots, 4),
	facecolor=BG_COLOR)
	if n_plots == 1:
	axes = [axes]

	def _draw_zipf(ax, words, color, label):
	freq = collections.Counter(words)
	counts = sorted(freq.values(), reverse=True)
	if not counts:
	return
	ranks = np.arange(1, len(counts) + 1)
	ax.loglog(ranks, counts, ".", color=color, ms=3, alpha=0.7)
	if len(counts) > 2:
	lr = np.log(ranks)
	lc = np.log(np.array(counts, dtype=float))
	slope, intercept = np.polyfit(lr, lc, 1)
	fit = np.exp(intercept + slope * lr)
	ax.loglog(ranks, fit, "-", color=color, lw=1.5, alpha=0.5)
	ax.set_title(f"{label}\nslope={slope:.2f}", color=TEXT_COLOR, fontsize=8, pad=4)
	style_ax(ax)

	_draw_zipf(axes[0], voynich_stats["words"], VOYNICH_COLOR, "Voynich")
	for i, (ls, lang) in enumerate(zip(lang_stats_list, selected_langs)):
	_draw_zipf(axes[i + 1], ls["words"], LANG_COLORS[i], lang)

	fig.suptitle("Zipf Word-Frequency Plots (log-log)", color=TEXT_COLOR, fontsize=9, y=1.01)
	fig.tight_layout(pad=1.2)
	return fig_to_pil(fig)


	def plot_distance_radar(voynich_stats, lang_stats_list, selected_langs):
	features = ["ioc", "h0", "h1", "h2", "ttr", "hapax"]
	feat_labels = ["IoC", "H₀", "H₁", "H₂", "TTR", "Hapax"]
	N = len(features)
	angles = [n / float(N) * 2 * math.pi for n in range(N)]
	angles += angles[:1]

	fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={"polar": True}, facecolor=BG_COLOR)
	ax.set_facecolor(PANEL_COLOR)

	all_stats = [voynich_stats] + lang_stats_list
	mins = {f: min(s[f] for s in all_stats) for f in features}
	maxs = {f: max(s[f] for s in all_stats) + 1e-10 for f in features}

	def norm(stats, f):
	return (stats[f] - mins[f]) / (maxs[f] - mins[f])

	vy = [norm(voynich_stats, f) for f in features]
	vy += vy[:1]
	ax.plot(angles, vy, "-", color=VOYNICH_COLOR, lw=2.2, label="Voynich")
	ax.fill(angles, vy, color=VOYNICH_COLOR, alpha=0.12)

	for i, (ls, lang) in enumerate(zip(lang_stats_list, selected_langs)):
	lv = [norm(ls, f) for f in features]
	lv += lv[:1]
	ax.plot(angles, lv, "--", color=LANG_COLORS[i], lw=1.6, label=lang, alpha=0.85)
	ax.fill(angles, lv, color=LANG_COLORS[i], alpha=0.05)

	ax.set_xticks(angles[:-1])
	ax.set_xticklabels(feat_labels, color=TEXT_COLOR, fontsize=9)
	ax.tick_params(colors=TEXT_COLOR)
	ax.yaxis.set_tick_params(colors=GRID_COLOR)
	ax.grid(color=GRID_COLOR, linewidth=0.5)
	ax.spines["polar"].set_color(GRID_COLOR)
	ax.set_title("Metric Radar (normalized)", color=TEXT_COLOR, fontsize=9, pad=15)
	ax.legend(loc="upper right", bbox_to_anchor=(1.35, 1.15),
	fontsize=7.5, labelcolor=TEXT_COLOR, facecolor=PANEL_COLOR, edgecolor=GRID_COLOR)
	fig.tight_layout()
	return fig_to_pil(fig)


	def plot_perplexity(voynich_tokens, lang_stats_list, selected_langs):
	perplexities = []
	for ls in lang_stats_list:
	p = compute_perplexity(voynich_tokens, ls["tokens"])
	perplexities.append(min(p, 9999))

	fig, ax = plt.subplots(figsize=(8, 4), facecolor=BG_COLOR)
	bars = ax.bar(selected_langs, perplexities,
	color=LANG_COLORS[:len(selected_langs)], edgecolor="none", width=0.55)
	ax.set_ylabel("Perplexity (lower = more similar)", color=TEXT_COLOR)
	style_ax(ax, "Cross-Entropy Perplexity of Voynich under Each Language Model")
	for bar, val in zip(bars, perplexities):
	ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 5,
	f"{val:.1f}", ha="center", color=TEXT_COLOR, fontsize=8)
	fig.tight_layout(pad=1.2)
	return fig_to_pil(fig)


	def plot_char_freq(voynich_stats, lang_stats_list, selected_langs):
	"""Character frequency distribution comparison"""
	fig, axes = plt.subplots(2, 3, figsize=(14, 7), facecolor=BG_COLOR)
	axes = axes.flatten()

	all_entries = [("Voynich", voynich_stats, VOYNICH_COLOR)] + \
	[(lang, ls, LANG_COLORS[i]) for i, (lang, ls) in enumerate(zip(selected_langs, lang_stats_list))]

	for ax, (label, stats, color) in zip(axes, all_entries):
	freq = collections.Counter(stats["tokens"])
	top = freq.most_common(20)
	chars, counts = zip(*top) if top else ([], [])
	ax.bar(chars, counts, color=color, alpha=0.85, edgecolor="none")
	style_ax(ax, f"{label} — top 20 chars")
	ax.set_xlabel("Character", color=TEXT_COLOR)
	ax.set_ylabel("Count", color=TEXT_COLOR)

	for ax in axes[len(all_entries):]:
	ax.set_visible(False)

	fig.suptitle("Character Frequency Distributions", color=TEXT_COLOR, fontsize=10, y=1.01)
	fig.tight_layout(pad=1.5)
	return fig_to_pil(fig)


	# ─────────────────────────────────────────────
	# SUMMARY TABLE
	# ─────────────────────────────────────────────

	def build_summary_html(voynich_stats, lang_stats_list, selected_langs):
	rows = []
	all_entries = [("Voynich", voynich_stats)] + list(zip(selected_langs, lang_stats_list))

	for label, stats in all_entries:
	dist = distance_vector(stats, voynich_stats) if label != "Voynich" else "—"
	perp = compute_perplexity(voynich_stats["tokens"], stats["tokens"]) if label != "Voynich" else "—"
	dist_str = f"{dist:.4f}" if isinstance(dist, float) else dist
	perp_str = f"{min(perp, 9999):.1f}" if isinstance(perp, float) else perp
	rows.append({
	"Corpus": label,
	"Chars": stats["n_chars"],
	"Words": stats["n_words"],
	"IoC": f"{stats['ioc']:.4f}",
	"H₀": f"{stats['h0']:.3f}",
	"H₁": f"{stats['h1']:.3f}",
	"H₂": f"{stats['h2']:.3f}",
	"TTR": f"{stats['ttr']:.3f}",
	"Hapax": f"{stats['hapax']:.3f}",
	"Zipf slope": f"{stats['zipf_slope']:.3f}",
	"Distance": dist_str,
	"Perplexity": perp_str,
	})

	cols = list(rows[0].keys())
	th_style = "background:#1e1e2e;color:#c9a84c;padding:7px 12px;border:1px solid #2a2a38;font-size:12px;"
	td_style = "padding:6px 12px;border:1px solid #2a2a38;color:#d4cfc8;font-size:11px;text-align:center;"
	td_voynich = "padding:6px 12px;border:1px solid #2a2a38;color:#e8c97a;font-size:11px;text-align:center;font-weight:bold;background:#1a1a22;"

	html = '<table style="border-collapse:collapse;width:100%;background:#0f0f14;">'
	html += "<thead><tr>" + "".join(f"<th style='{th_style}'>{c}</th>" for c in cols) + "</tr></thead>"
	html += "<tbody>"
	for row in rows:
	is_voynich = row["Corpus"] == "Voynich"
	td = td_voynich if is_voynich else td_style
	html += "<tr>" + "".join(f"<td style='{td}'>{row[c]}</td>" for c in cols) + "</tr>"
	html += "</tbody></table>"
	return html


	def build_ranking_html(voynich_stats, lang_stats_list, selected_langs):
	ranked = []
	for lang, ls in zip(selected_langs, lang_stats_list):
	dist = distance_vector(ls, voynich_stats)
	perp = min(compute_perplexity(voynich_stats["tokens"], ls["tokens"]), 9999)
	ranked.append((lang, dist, perp))

	ranked_by_dist = sorted(ranked, key=lambda x: x[1])
	ranked_by_perp = sorted(ranked, key=lambda x: x[2])

	def medal(i):
	return ["🥇", "🥈", "🥉", "4th", "5th"][i] if i < 5 else str(i + 1)

	html = '<div style="display:flex;gap:24px;flex-wrap:wrap;">'

	html += '<div style="flex:1;min-width:240px;">'
	html += '<h3 style="color:#c9a84c;font-size:13px;margin-bottom:8px;">Closest by Metric Distance</h3>'
	for i, (lang, dist, _) in enumerate(ranked_by_dist):
	html += f'<div style="margin:4px 0;color:#d4cfc8;font-size:12px;">{medal(i)} <b>{lang}</b> — dist={dist:.4f}</div>'
	html += "</div>"

	html += '<div style="flex:1;min-width:240px;">'
	html += '<h3 style="color:#c9a84c;font-size:13px;margin-bottom:8px;">Closest by Perplexity</h3>'
	for i, (lang, _, perp) in enumerate(ranked_by_perp):
	html += f'<div style="margin:4px 0;color:#d4cfc8;font-size:12px;">{medal(i)} <b>{lang}</b> — perp={perp:.1f}</div>'
	html += "</div>"

	html += "</div>"
	return html


	# ─────────────────────────────────────────────
	# MAIN ANALYSIS PIPELINE
	# ─────────────────────────────────────────────

	def run_analysis(
	uploaded_file,
	use_sample: bool,
	bigraph_rules_text: str,
	selected_langs: list[str],
	apply_splits: bool,
	):
	# FIXED: error returns now correctly produce exactly 8 outputs
	if uploaded_file is None and not use_sample:
	return [None, None, None, None, None, None,
	"<p style='color:#e87a7a'>Please upload a file or enable the sample corpus.</p>", ""]

	if not selected_langs:
	return [None, None, None, None, None, None,
	"<p style='color:#e87a7a'>Please select at least one comparison language.</p>", ""]

	# 1. Load Voynich text
	if uploaded_file is not None:
	with open(uploaded_file, "r", encoding="utf-8", errors="replace") as f:
	raw_voynich = f.read()
	else:
	raw_voynich = SAMPLE_EVA

	# 2. Apply bigraph splits (optionally)
	rules = parse_bigraph_rules(bigraph_rules_text) if apply_splits else []
	processed_voynich = apply_bigraph_splits(raw_voynich, rules) if rules else raw_voynich

	# 3. Tokenize
	vy_tokens = tokenize(processed_voynich)
	vy_words = tokenize_words(processed_voynich)

	if not vy_tokens:
	return [None, None, None, None, None, None,
	"<p style='color:#e87a7a'>Could not extract tokens from the text. Check input format.</p>", ""]

	voynich_stats = build_stats(vy_tokens, vy_words, "Voynich")

	# 4. Process each selected language
	lang_stats_list = []
	for lang in selected_langs:
	corpus = LANGUAGE_CORPORA.get(lang, "")
	l_tokens = tokenize(corpus)
	l_words = tokenize_words(corpus)
	lang_stats_list.append(build_stats(l_tokens, l_words, lang))

	# 5. Produce all plots — each returns a PIL Image (no temp files needed)
	ioc_img = plot_ioc_comparison(voynich_stats, lang_stats_list, selected_langs)
	entropy_img = plot_entropy_curves(voynich_stats, lang_stats_list, selected_langs)
	zipf_img = plot_zipf(voynich_stats, lang_stats_list, selected_langs)
	radar_img = plot_distance_radar(voynich_stats, lang_stats_list, selected_langs)
	perp_img = plot_perplexity(vy_tokens, lang_stats_list, selected_langs)
	freq_img = plot_char_freq(voynich_stats, lang_stats_list, selected_langs)

	# 6. Summary table + ranking
	summary_html = build_summary_html(voynich_stats, lang_stats_list, selected_langs)
	ranking_html = build_ranking_html(voynich_stats, lang_stats_list, selected_langs)

	# 7. PMI info block
	pmi_pairs = pmi_top_pairs(vy_tokens, top_n=15)
	pmi_html = '<h3 style="color:#c9a84c;font-size:13px;">Top PMI Bigram Pairs (Voynich)</h3>'
	pmi_html += '<div style="display:flex;flex-wrap:wrap;gap:8px;">'
	for (a, b), score in pmi_pairs:
	pmi_html += (
	f'<span style="background:#1e1e2e;border:1px solid #2a2a38;padding:3px 8px;'
	f'border-radius:4px;color:#d4cfc8;font-size:11px;">'
	f'{a}+{b} <b style="color:#e8c97a">{score:.2f}</b></span>'
	)
	pmi_html += "</div>"

	return [ioc_img, entropy_img, zipf_img, radar_img, perp_img, freq_img,
	summary_html, ranking_html + "<br>" + pmi_html]


	# ─────────────────────────────────────────────
	# GRADIO UI
	# ─────────────────────────────────────────────

	CUSTOM_CSS = """
	@import url('https://fonts.googleapis.com/css2?family=IM+Fell+English:ital@0;1&family=JetBrains+Mono:wght@400;600&display=swap');

	body, .gradio-container {
	background: #0f0f14 !important;
	color: #d4cfc8 !important;
	font-family: 'IM Fell English', serif !important;
	}

	h1, h2, h3 { color: #e8c97a !important; letter-spacing: 0.04em; }

	.gr-panel, .gr-box, .gr-form { background: #13131b !important; border-color: #2a2a38 !important; }

	.gr-button {
	background: #c9a84c !important;
	color: #0f0f14 !important;
	border: none !important;
	font-family: 'JetBrains Mono', monospace !important;
	font-weight: 600 !important;
	letter-spacing: 0.05em;
	border-radius: 3px !important;
	}
	.gr-button:hover { background: #e8c97a !important; }

	.gr-check-radio { accent-color: #c9a84c !important; }

	textarea, input[type=text] {
	background: #16161f !important;
	color: #d4cfc8 !important;
	border: 1px solid #2a2a38 !important;
	font-family: 'JetBrains Mono', monospace !important;
	font-size: 12px !important;
	}

	label { color: #b8b0a8 !important; font-size: 13px !important; }

	.gr-tab-item { color: #c9a84c !important; border-color: #2a2a38 !important; }
	.gr-tab-item.selected { background: #1e1e2e !important; }

	.gr-image img { border-radius: 4px; border: 1px solid #2a2a38; }

	#header-block {
	text-align: center;
	padding: 24px 0 12px;
	border-bottom: 1px solid #2a2a38;
	margin-bottom: 16px;
	}
	#header-block h1 { font-size: 2rem; margin-bottom: 4px; }
	#header-block p { color: #888; font-size: 0.9rem; font-style: italic; }
	"""

	HEADER_HTML = """
	<div id="header-block">
	<h1>⚗ Voynich Linguistic Analyzer</h1>
	<p>EVA transliteration · statistical cryptolinguistics · language comparison</p>
	</div>
	"""


	def create_app():
	with gr.Blocks(css=CUSTOM_CSS, title="Voynich Analyzer") as demo:

	gr.HTML(HEADER_HTML)

	with gr.Row():
	# ── LEFT PANEL: Controls ──────────────────────────────
	with gr.Column(scale=1, min_width=280):
	gr.Markdown("### 📂 Input")

	uploaded_file = gr.File(
	label="Upload EVA transliteration (.txt)",
	file_types=[".txt"],
	type="filepath",
	)
	use_sample = gr.Checkbox(
	label="Use built-in sample corpus (fallback if no upload)",
	value=True,
	)

	gr.Markdown("### ✂️ Bigraph Splitting")
	apply_splits = gr.Checkbox(label="Apply bigraph splitting", value=True)
	bigraph_rules = gr.Textbox(
	label="Rules (format: xy -> x y, one per line)",
	value=DEFAULT_BIGRAPH_RULES,
	lines=10,
	)

	gr.Markdown("### 🌍 Comparison Languages")
	selected_langs = gr.CheckboxGroup(
	choices=list(LANGUAGE_CORPORA.keys()),
	value=list(LANGUAGE_CORPORA.keys()),
	label="Languages to compare",
	)

	run_btn = gr.Button("▶ Run Analysis", variant="primary")

	# ── RIGHT PANEL: Results ─────────────────────────────
	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.Tab("📊 Summary"):
	ranking_out = gr.HTML(label="Ranking")
	summary_out = gr.HTML(label="Metrics Table")

	with gr.Tab("📈 Index of Coincidence"):
	# FIXED: type="pil" — no deprecated filepath mode, no temp files
	ioc_out = gr.Image(label="IoC Comparison", type="pil")

	with gr.Tab("🌀 Entropy Curves"):
	entropy_out = gr.Image(label="Entropy Curves", type="pil")

	with gr.Tab("📉 Zipf Plots"):
	zipf_out = gr.Image(label="Zipf Word-Frequency", type="pil")

	with gr.Tab("🎯 Radar Chart"):
	radar_out = gr.Image(label="Metric Radar", type="pil")

	with gr.Tab("🔮 Perplexity"):
	perp_out = gr.Image(label="Cross-Entropy Perplexity", type="pil")

	with gr.Tab("🔤 Char Frequencies"):
	freq_out = gr.Image(label="Character Frequency Distributions", type="pil")

	# FIXED: run_analysis now returns exactly 8 items directly —
	# no wrapper needed, no temp file writing
	run_btn.click(
	fn=run_analysis,
	inputs=[uploaded_file, use_sample, bigraph_rules, selected_langs, apply_splits],
	outputs=[ioc_out, entropy_out, zipf_out, radar_out, perp_out, freq_out,
	summary_out, ranking_out],
	)

	return demo


	if __name__ == "__main__":
	app = create_app()
	# FIXED: no hardcoded host/port — HF Spaces injects its own server config
	app.launch()