Spaces:
Sleeping
Sleeping
| import re | |
| import gradio as gr | |
| import unicodedata | |
| # ----------------------------- | |
| # UTILITAIRES DE NORMALISATION | |
| # ----------------------------- | |
| def normalize(text): | |
| text = text.lower() | |
| text = unicodedata.normalize('NFD', text) | |
| return ''.join(c for c in text if unicodedata.category(c) != 'Mn') | |
| def build_norm_map(original): | |
| norm_chars = [] | |
| mapping = [] | |
| for i, ch in enumerate(original): | |
| decomposed = unicodedata.normalize('NFD', ch) | |
| base_chars = ''.join(c for c in decomposed if unicodedata.category(c) != 'Mn') | |
| if base_chars == '': | |
| continue | |
| for nb in base_chars: | |
| norm_chars.append(nb.lower()) | |
| mapping.append(i) | |
| return ''.join(norm_chars), mapping | |
| def slice_original_from_norm_span(orig, map_norm, start_norm, end_norm): | |
| start_orig = map_norm[start_norm] | |
| end_orig = map_norm[end_norm - 1] + 1 | |
| return orig[start_orig:end_orig] | |
| # ----------------------------- | |
| # LISTES EXHAUSTIVES | |
| # ----------------------------- | |
| intensificateurs = [ | |
| "trop", "très","si","fort", "forte", "forts", "fortes", | |
| "bien", "furieusement", "intensément", "tout", "tant", | |
| "complètement", "pleinement", "violemment", "parfaitement", | |
| "totalement", "beaucoup", "énormément", "extrêmement", | |
| "hyper", "profondément", "follement", "absolument", "vraiment", | |
| "extraordinairement", "incroyablement", "terriblement","particulièrement","singulièrement", | |
| "prodigieusement", "excessivement", "vachement", "bigrement","puissamment","fortement","hautement","considérablement","remarquablement" | |
| ] | |
| moderateurs = [ | |
| "assez", "modérément", "moyennement", "quasi", | |
| "quasiment", "presque", "pas mal", "plutôt", | |
| "relativement", "légèrement", "quelque peu", | |
| "un tantinet" | |
| ] | |
| attenuateurs = [ | |
| "peu", "à peine", "médiocrement", "passablement", | |
| "petitement", "un peu", "faiblement", "timidement", | |
| "un brin", "modiquement" | |
| ] | |
| adj_intensificateurs = [ | |
| "énorme", "énormes", "immense", "immenses", "extrême", "extrêmes", | |
| "suprême", "suprêmes", "immensurable", "immensurables", | |
| "profond", "profonde", "profondes", "profonds", | |
| "parfait", "parfaite", "parfaits", "parfaites", | |
| "irréprochable", "irréprochables", | |
| "implacable", "implacables", | |
| "insondable", "insondables", | |
| "infini", "infinie", "infinis", "infinies", | |
| "formidable", "formidables", | |
| "exceptionnel", "exceptionnelle", "exceptionnels", "exceptionnelles", | |
| "incroyable", "incroyables", | |
| "extraordinaire", "extraordinaires", | |
| "fantastique", "fantastiques", | |
| "gigantesque", "gigantesques", | |
| "monumental", "monumentale", "monumentales", "monumentaux", | |
| "colossal", "colossale", "colossaux", "colossales", "remarquable","remarquables", | |
| "phénoménal", "phénoménale", "phénoménaux", "phénoménales", | |
| "immenses", "intense", "intenses", | |
| "absolu", "absolue", "absolus", "absolues","titanesque","titanesques","herculéenne","herculéen","herculéens","herculéennes", | |
| "appuyé", "appuyés", "appuyée","maximal","maximale","maximaux","maximales","optimal","optimale","optimaux","optimales","appuyées", | |
| "démesuré","démesurée","démesurés","démesurées","accablants","accablantes", | |
| "furieux","furieuse","furieuses","frappant","frappante","fulgurante","fulgurant","fulgurants","fulgurantes","considérable","considérables", | |
| "élevé","élevée","élevés","élevées","accablant","accablante" | |
| ] | |
| adj_attenuateurs = ["infime","infimes","minime","minimes","minuscules","minuscule","minimale","minimal","minimales","minimaux"] | |
| # ----------------------------- | |
| # PRÉFIXES | |
| # ----------------------------- | |
| prefixes_intensifs = ["archi", "extra", "super", "hyper", "sur", "mega"] | |
| prefixes_attenuateurs = ["sous", "hypo", "infra", "mini"] | |
| # ----------------------------- | |
| # EXCLUSIONS | |
| # ----------------------------- | |
| exclusions = [ | |
| "plus tard", "plus de vingt ans", "tout à fait", | |
| "énormément de modalisateurs", "bien que", "bien qu","s'il","si elle","si le","si la","si les","si un","si une","si des","si nous","si vous","si je","si tu","si elles","si ils" | |
| ] | |
| # ----------------------------- | |
| # PHRASÉOLOGISMES | |
| # ----------------------------- | |
| phras_intensifs = [ | |
| "avoir le cafard","ai le cafard","as le cafard","a le cafard", | |
| "avons le cafard","avez le cafard","ont le cafard", | |
| "avais le cafard","avait le cafard","avions le cafard","aviez le cafard","avaient le cafard", | |
| "aurai le cafard","aura le cafard","aurons le cafard","aurez le cafard","auront le cafard", | |
| "aurais le cafard","aurait le cafard","aurions le cafard","auriez le cafard","auraient le cafard", | |
| "être au septième ciel","suis au septième ciel","es au septième ciel","est au septième ciel", | |
| "sommes au septième ciel","êtes au septième ciel","sont au septième ciel", | |
| "étais au septième ciel","était au septième ciel","étions au septième ciel","étiez au septième ciel","étaient au septième ciel", | |
| "serai au septième ciel","sera au septième ciel","serons au septième ciel","serez au septième ciel","seront au septième ciel", | |
| "serais au septième ciel","serait au septième ciel","serions au septième ciel","seriez au septième ciel","seraient au septième ciel", | |
| "se tordre de douleur","me tords de douleur","te tords de douleur","se tord de douleur", | |
| "nous nous tordons de douleur","vous vous tordez de douleur","se tordent de douleur", | |
| "mourir de rire","meurs de rire","meurt de rire","mourons de rire","mourez de rire","meurent de rire", | |
| "mourir de faim","meurs de faim","meurt de faim","mourons de faim","mourez de faim","meurent de faim", | |
| "mourir de froid","meurs de froid","meurt de froid","mourons de froid","mourez de froid","meurent de froid", | |
| "brûler d'amour","brûle d'amour","brûlait d'amour","brûlerai d'amour", | |
| "brûler d'impatience","brûle d'impatience","brûlait d'impatience","brûlerai d'impatience", | |
| "trembler de peur","tremble de peur","tremblait de peur","tremblerai de peur", | |
| "être transporté de joie","suis transporté de joie","est transporté de joie","sommes transportés de joie", | |
| "à verse","à couper le souffle","à mort","à fond","à pas de géant","plein à craquer","laid à pleurer","hors de lui", | |
| "au comble de la joie","au comble de la performance","fièvre de cheval","froideur de glace","faim de loup", | |
| "froid de canard","appétit d'orge","patience d'ange","comme une souche","comme un trou","comme un putois", | |
| "avoir le cœur en capilotade","à bout de force","à bout de souffle","se traîner comme une limace","timide comme une souris", | |
| "froid comme un glaçon","d'une lenteur d'escargot","faible comme une plume","avoir les jambes en coton","mal en point", | |
| "pluie diluvienne","force herculéenne","froid sibérien","à gorge déployée","à toute allure","à toute vitesse","de toutes forces", | |
| "fort comme un turc","rouge comme une tomate","malade comme un chien" | |
| ] | |
| phras_mod = [ | |
| "un peu comme", "dans une certaine mesure", "à moitié chemin", "à la limite", "mettre la main à la pâte" | |
| ] | |
| phras_att = [ | |
| "appétit d'oiseau","force de moineau","de faible envergure","de moins en moins" | |
| ] | |
| # ----------------------------- | |
| # COMPARATIFS & SUPERLATIFS | |
| # ----------------------------- | |
| comp_super_intens = [ | |
| "plus que","plus de","meilleur","meilleure","meilleurs","meilleures","mieux", | |
| "le plus","la plus","les plus","le meilleur","la meilleure","les meilleurs","les meilleures", | |
| "le mieux" | |
| ] | |
| comp_super_att = [ | |
| "moins que","moins de","moindre","moindres","le moins","la moindre","les moindres" | |
| ] | |
| comp_super_mod = ["aussi que","autant que","autant de"] | |
| # ----------------------------- | |
| # NORMALISATION | |
| # ----------------------------- | |
| def build_norm(items): | |
| return [normalize(x) for x in items] | |
| norm_intens = build_norm(intensificateurs + adj_intensificateurs) | |
| norm_mod = build_norm(moderateurs + comp_super_mod) | |
| norm_att = build_norm(attenuateurs + adj_attenuateurs + comp_super_att) | |
| norm_phras_int = build_norm(phras_intensifs) | |
| norm_phras_mod = build_norm(phras_mod) | |
| norm_phras_att = build_norm(phras_att) | |
| norm_adj_int = build_norm(adj_intensificateurs) | |
| norm_prefixes_int = [p.lower() for p in prefixes_intensifs] | |
| norm_exclusions = build_norm(exclusions) | |
| norm_comp_int = build_norm(comp_super_intens) | |
| norm_comp_att = build_norm(comp_super_att) | |
| # ----------------------------- | |
| # FONCTIONS DE DÉTECTION | |
| # ----------------------------- | |
| def detect_multiword_with_gaps(text_norm, map_norm, orig, phrase_norm, max_gaps=3): | |
| matches = [] | |
| parts = phrase_norm.split() | |
| if not parts: return matches | |
| escaped = [re.escape(p) for p in parts] | |
| gap = r'(?:\s+\w+){0,' + str(max_gaps) + r'}\s*' | |
| pattern = r'\b' + gap.join(escaped) + r'\b' | |
| for m in re.finditer(pattern, text_norm): | |
| s, e = m.start(), m.end() | |
| matches.append((s, e, slice_original_from_norm_span(orig, map_norm, s, e))) | |
| return matches | |
| def detect_words_general(text_norm, map_norm, orig, norm_items, occupied): | |
| matches = [] | |
| for item in sorted(set(norm_items), key=lambda s: -len(s)): | |
| if not item: continue | |
| for s,e,orig_sub in detect_multiword_with_gaps(text_norm, map_norm, orig, item, max_gaps=3): | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| matches.append((s,e,orig_sub)) | |
| return matches | |
| def detect_un_peu_and_peu(text_norm, map_norm, orig, occupied): | |
| matches = [] | |
| for m in re.finditer(r'\bun\s+peu\b', text_norm): | |
| s,e = m.start(), m.end() | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| matches.append((s,e,slice_original_from_norm_span(orig,map_norm,s,e))) | |
| for m in re.finditer(r'\bpeu\b', text_norm): | |
| s,e = m.start(), m.end() | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| matches.append((s,e,slice_original_from_norm_span(orig,map_norm,s,e))) | |
| return matches | |
| def detect_bien_plus_adj(text_norm, map_norm, orig, occupied, adj_norm_list): | |
| matches = [] | |
| for m in re.finditer(r'\bbien\b', text_norm): | |
| if re.match(r'\bbien\s+que\b', text_norm[m.start():]): continue | |
| for adj in adj_norm_list: | |
| pat = r'\bbien(?:\s+\w+){0,2}\s+' + re.escape(adj) + r'\b' | |
| for mm in re.finditer(pat, text_norm): | |
| s,e = mm.start(), mm.end() | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| matches.append((s,e,slice_original_from_norm_span(orig,map_norm,s,e))) | |
| return matches | |
| def detect_prefixed_adjectives(text_norm, map_norm, orig, occupied, prefixes_norm, adj_norm_list): | |
| matches = [] | |
| for m in re.finditer(r'\b\w+\b', text_norm): | |
| s,e = m.start(), m.end() | |
| if any(occupied[s:e]): continue | |
| token = text_norm[s:e] | |
| for p in prefixes_norm: | |
| if token.startswith(p) and len(token) > len(p)+1: | |
| for i in range(s,e): occupied[i]=True | |
| matches.append((s,e,slice_original_from_norm_span(orig,map_norm,s,e))) | |
| break | |
| return matches | |
| def detect_comp_plus_minus_aussi_with_adj(text_norm, map_norm, orig, occupied): | |
| res = {'plus':[], 'moins':[], 'aussi':[], 'comp':[], 'super':[]} | |
| # Comparatifs + adjectifs | |
| for m in re.finditer(r'\bplus(?:\s+\w+){1,2}\b', text_norm): | |
| s,e = m.start(), m.end() | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| res['comp'].append((s,e,slice_original_from_norm_span(orig,map_norm,s,e))) | |
| # Moins + adjectifs | |
| for m in re.finditer(r'\bmoins(?:\s+\w+){1,2}\b', text_norm): | |
| s,e = m.start(), m.end() | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| res['comp'].append((s,e,slice_original_from_norm_span(orig,map_norm,s,e))) | |
| # Superlatifs normaux | |
| for item in norm_comp_int + norm_comp_att: | |
| for s,e,txt in detect_multiword_with_gaps(text_norm,map_norm,orig,item, max_gaps=1): | |
| if any(occupied[s:e]): continue | |
| for i in range(s,e): occupied[i]=True | |
| res['super'].append((s,e,txt)) | |
| return res | |
| def detect_exclusions(text_norm, map_norm, orig, exclusions_norm, occupied): | |
| for excl in exclusions_norm: | |
| if not excl: continue | |
| for s,e,sub in detect_multiword_with_gaps(text_norm, map_norm, orig, excl, max_gaps=1): | |
| for i in range(s,e): occupied[i]=True | |
| # ----------------------------- | |
| # ANALYSEUR PRINCIPAL | |
| # ----------------------------- | |
| def analyse_text(text): | |
| if not text or not text.strip(): return "Entrez un texte valide." | |
| orig = text | |
| text_norm, map_norm = build_norm_map(orig) | |
| occupied = [False]*max(1,len(text_norm)) | |
| # Exclusions prioritaires | |
| detect_exclusions(text_norm,map_norm,orig,norm_exclusions,occupied) | |
| # Phraséologismes | |
| phras_int = detect_words_general(text_norm,map_norm,orig,norm_phras_int,occupied) | |
| phras_mod = detect_words_general(text_norm,map_norm,orig,norm_phras_mod,occupied) | |
| phras_att = detect_words_general(text_norm,map_norm,orig,norm_phras_att,occupied) | |
| # 'un peu' et 'peu' | |
| un_peu_matches = detect_un_peu_and_peu(text_norm,map_norm,orig,occupied) | |
| # 'bien + adj' | |
| bien_adj_matches = detect_bien_plus_adj(text_norm,map_norm,orig,occupied,norm_adj_int) | |
| # Préfixes | |
| prefixed_matches = detect_prefixed_adjectives(text_norm,map_norm,orig,occupied,norm_prefixes_int,norm_adj_int) | |
| # Comparatifs & superlatifs | |
| comp_matches = detect_comp_plus_minus_aussi_with_adj(text_norm,map_norm,orig,occupied) | |
| # Général lexical | |
| general_intens = detect_words_general(text_norm,map_norm,orig,norm_intens,occupied) | |
| general_mod = detect_words_general(text_norm,map_norm,orig,norm_mod,occupied) | |
| general_att = detect_words_general(text_norm,map_norm,orig,norm_att,occupied) | |
| # Séparation par catégorie | |
| intens_detect = general_intens + bien_adj_matches + prefixed_matches | |
| moder_detect = general_mod | |
| atten_detect = general_att + un_peu_matches | |
| comp_detect = comp_matches['comp'] | |
| super_detect = comp_matches['super'] | |
| # Highlights | |
| highlights = [] | |
| for s,e,txt in intens_detect: highlights.append((s,e,'intensifs',txt)) | |
| for s,e,txt in moder_detect: highlights.append((s,e,'moder',txt)) | |
| for s,e,txt in atten_detect: highlights.append((s,e,'atten',txt)) | |
| for s,e,txt in phras_int: highlights.append((s,e,'phras_int',txt)) | |
| for s,e,txt in phras_mod: highlights.append((s,e,'phras_mod',txt)) | |
| for s,e,txt in phras_att: highlights.append((s,e,'phras_att',txt)) | |
| for s,e,txt in comp_detect: highlights.append((s,e,'comp',txt)) | |
| for s,e,txt in super_detect: highlights.append((s,e,'super',txt)) | |
| highlights.sort(key=lambda x:x[0]) | |
| # HTML highlighting | |
| html_out="" | |
| last=0 | |
| colors = { | |
| "intensifs":"background-color:#ff4d4d;", | |
| "moder":"background-color:#a6e1ff;", | |
| "atten":"background-color:#d9d9d9;", | |
| "phras_int":"background-color:#ffb3b3;", | |
| "phras_mod":"background-color:#cfe9ff;", | |
| "phras_att":"background-color:#f0f0f0;", | |
| "comp":"background-color:#ffff99;", # jaune | |
| "super":"background-color:#ff9933;" # orange | |
| } | |
| def esc(s): return s.replace("&","&").replace("<","<").replace(">",">") | |
| for s_norm,e_norm,cat,orig_sub in highlights: | |
| try: | |
| s_o = map_norm[s_norm] | |
| e_o = map_norm[e_norm-1]+1 | |
| html_out += esc(orig[last:s_o]) | |
| html_out += f"<span style='{colors[cat]} padding:2px 4px; border-radius:4px'>{esc(orig[s_o:e_o])}</span>" | |
| last=e_o | |
| except: | |
| html_out += esc(orig[last:]) | |
| last=len(orig) | |
| break | |
| html_out += esc(orig[last:]) | |
| # Listes uniques | |
| def uniq(seq): | |
| seen=set() | |
| out=[] | |
| for s,e,txt in seq: | |
| if txt not in seen: | |
| seen.add(txt) | |
| out.append(txt) | |
| return out | |
| intens_list=uniq(intens_detect) | |
| moder_list=uniq(moder_detect) | |
| atten_list=uniq(atten_detect) | |
| phras_int_list=[t[2] for t in phras_int] | |
| phras_mod_list=[t[2] for t in phras_mod] | |
| phras_att_list=[t[2] for t in phras_att] | |
| comp_list=uniq(comp_detect) | |
| super_list=uniq(super_detect) | |
| # Notes traduction | |
| translate_notes=[] | |
| for idi in phras_int_list: | |
| translate_notes.append(f"- « {idi} » (intensificateur) — traduire avec précaution pour préserver la force expressive.") | |
| for idi in phras_mod_list: | |
| translate_notes.append(f"- « {idi} » (modérateur) — vérifier la nuance; une traduction littérale peut altérer le ton.") | |
| for idi in phras_att_list: | |
| translate_notes.append(f"- « {idi} » (atténuateur) — adapter selon le registre cible pour conserver l'effet atténuateur.") | |
| # Degré de modalisation | |
| total_occurrences = len(intens_list)+len(moder_list)+len(atten_list)+len(phras_int_list)+len(phras_mod_list)+len(phras_att_list)+len(comp_list)+len(super_list) | |
| if total_occurrences <=3: deg_modal="faible" | |
| elif total_occurrences <=6: deg_modal="moyen" | |
| else: deg_modal="fort" | |
| # Rapport Markdown HTML | |
| report_md = f""" | |
| 🎯 **RAPPORT D'ANALYSE — MODALISATION** | |
| Aperçu (occurrences surlignées) 🔬 : | |
| <div style="padding:10px;border:1px solid #ddd;border-radius:6px;font-size:14px">{html_out}</div> | |
| --- | |
| 💥 **Intensificateurs détectés ({len(intens_list)}):** | |
| {', '.join(intens_list) if intens_list else 'Aucun'} | |
| ⚖️ **Modérateurs détectés ({len(moder_list)}):** | |
| {', '.join(moder_list) if moder_list else 'Aucun'} | |
| 🌫️ **Atténuateurs détectés ({len(atten_list)}):** | |
| {', '.join(atten_list) if atten_list else 'Aucun'} | |
| 🔶 **Comparatifs détectés ({len(comp_list)}):** | |
| {', '.join(comp_list) if comp_list else 'Aucun'} | |
| 🟠 **Superlatifs détectés ({len(super_list)}):** | |
| {', '.join(super_list) if super_list else 'Aucun'} | |
| --- | |
| 🔎 **Phraséologismes (séparés par sens):** | |
| 🔥 Intensificateurs ({len(phras_int_list)}): {', '.join(phras_int_list) if phras_int_list else 'Aucun'} | |
| 🌱 Modérateurs ({len(phras_mod_list)}): {', '.join(phras_mod_list) if phras_mod_list else 'Aucun'} | |
| 🌧️ Atténuateurs ({len(phras_att_list)}): {', '.join(phras_att_list) if phras_att_list else 'Aucun'} | |
| --- | |
| 📝 **Notes et conseils de traduction** | |
| {chr(10).join(translate_notes) if translate_notes else 'Aucun phraséologisme détecté.'} | |
| --- | |
| 📊 **Degré de la modalisation intensive du texte :** {deg_modal} (total occurrences : {total_occurrences}) | |
| --- | |
| 🛠️ **Note technique** | |
| Détection agile : comparatifs (jaune) et superlatifs (orange) intégrés, préfixes traités (ex. hyper-), distinctions 'bien + adj' / 'bien que' et 'un peu' / 'peu' gérées. | |
| """ | |
| return report_md | |
| # ----------------------------- | |
| # INTERFACE GRADIO | |
| # ----------------------------- | |
| iface = gr.Interface( | |
| fn=analyse_text, | |
| inputs=gr.Textbox(label="Entrez le texte à analyser🖊️", lines=15, placeholder="Collez votre texte ici..."), | |
| outputs=gr.Markdown(label="Rapport"), | |
| title="Analyseur de modalisateurs et d'intensité textuelle🌡️", | |
| description=("Cette application détecte et surligne les modalisateurs (intensificateurs, modérateurs et atténuateurs) " | |
| "ainsi que les phraséologismes. Elle calcule ensuite le degré de la modalisation intensive du texte saisi et fournit " | |
| "des notes et conseils pour l'équivalence traductive.") | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |