Spaces:
Sleeping
Sleeping
| """ | |
| Swedish Text Complexity Analyzer | |
| Uppsala NLP - Hugging Face Space | |
| """ | |
| import gradio as gr | |
| import math | |
| import re | |
| def calculate_lix(text): | |
| """Calculate LIX and other Swedish readability metrics.""" | |
| # Tokenize | |
| words = re.findall(r'[a-zA-ZåäöÅÄÖéÉüÜ]+', text.lower()) | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| if not words or not sentences: | |
| return None | |
| num_words = len(words) | |
| num_sentences = len(sentences) | |
| num_long_words = sum(1 for w in words if len(w) > 6) | |
| num_unique = len(set(words)) | |
| num_chars = sum(len(w) for w in words) | |
| # LIX | |
| lix = (num_words / num_sentences) + (num_long_words * 100 / num_words) | |
| # OVIX | |
| if num_words >= 10 and num_unique > 1: | |
| log_tokens = math.log(num_words) | |
| log_types = math.log(num_unique) | |
| denom = 2 - (log_types / log_tokens) | |
| ovix = log_tokens / math.log(denom) if denom > 1 else 0 | |
| else: | |
| ovix = 0 | |
| # Category | |
| if lix < 25: cat = "Mycket lätt / Very Easy" | |
| elif lix < 30: cat = "Lätt / Easy" | |
| elif lix < 40: cat = "Medel / Medium" | |
| elif lix < 50: cat = "Svår / Difficult" | |
| elif lix < 60: cat = "Mycket svår / Very Difficult" | |
| else: cat = "Extremt svår / Extremely Difficult" | |
| return { | |
| "LIX Score": round(lix, 1), | |
| "Category": cat, | |
| "OVIX (Lexical Variation)": round(ovix, 1), | |
| "Avg Sentence Length": round(num_words / num_sentences, 1), | |
| "Avg Word Length": round(num_chars / num_words, 2), | |
| "Long Words (>6 chars)": f"{round(num_long_words * 100 / num_words, 1)}%", | |
| "Total Words": num_words, | |
| "Total Sentences": num_sentences, | |
| "Unique Words": num_unique | |
| } | |
| def analyze_text(text): | |
| if not text.strip(): | |
| return "Please enter some Swedish text to analyze." | |
| results = calculate_lix(text) | |
| if not results: | |
| return "Could not analyze text. Please enter valid Swedish text." | |
| output = "## 📊 Analysis Results\n\n" | |
| output += f"### LIX: {results['LIX Score']} ({results['Category']})\n\n" | |
| output += "| Metric | Value |\n|--------|-------|\n" | |
| for k, v in results.items(): | |
| output += f"| {k} | {v} |\n" | |
| output += "\n### 📖 LIX Scale Reference\n" | |
| output += "| Score | Level | Example |\n|-------|-------|--------|\n" | |
| output += "| < 25 | Very Easy | Children's books |\n" | |
| output += "| 25-30 | Easy | Simple fiction |\n" | |
| output += "| 30-40 | Medium | Newspapers |\n" | |
| output += "| 40-50 | Difficult | Official documents |\n" | |
| output += "| 50-60 | Very Difficult | Academic texts |\n" | |
| output += "| > 60 | Extremely Difficult | Legal/technical |\n" | |
| return output | |
| examples = [ | |
| ["Solen skiner. Kalle går ut. Han ser en hund. Hunden är glad. De leker tillsammans i parken."], | |
| ["Regeringen presenterade igår ett nytt förslag om klimatåtgärder. Enligt statsministern ska Sverige minska sina utsläpp med femtio procent till år 2030."], | |
| ["Den epistemologiska problematiken kring vetenskaplig objektivitet har genomgått betydande transformationer under det senaste århundradet. Poststrukturalistiska perspektiv har ifrågasatt fundamentala antaganden om kunskapsproduktionens neutralitet."], | |
| ] | |
| demo = gr.Interface( | |
| fn=analyze_text, | |
| inputs=gr.Textbox( | |
| label="Swedish Text", | |
| placeholder="Enter Swedish text here...", | |
| lines=8 | |
| ), | |
| outputs=gr.Markdown(label="Results"), | |
| title="🇸🇪 Swedish Text Complexity Analyzer", | |
| description=""" | |
| Analyze the readability of Swedish text using established linguistic metrics. | |
| **Metrics:** | |
| - **LIX** (Läsbarhetsindex): Standard Swedish readability formula by Carl-Hugo Björnsson (1968) | |
| - **OVIX** (Ordvariationsindex): Lexical variation measure | |
| Part of [Uppsala NLP](https://huggingface.co/UppsalaNLP) research tools. | |
| """, | |
| examples=examples, | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |