resources / index.html
beshiribrahim's picture
Upload index.html
3db7bdf verified
Raw
History Blame Contribute Delete
40.4 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-0GLBPVSQDG"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-0GLBPVSQDG');
</script>
<!-- ============================================================
SEO — replace https://beittigreai-resources.static.hf.space/ with your real URL once hosted
============================================================ -->
<title>Tigre Language NLP Resources — Datasets, Models, Dictionaries &amp; Tools</title>
<meta name="description" content="A curated index of open datasets, machine-translation and ASR models, dictionaries, corpora, and tools for the Tigre language (ISO 639-3: tig), a Semitic language of Eritrea and eastern Sudan written in Ge'ez script.">
<meta name="keywords" content="Tigre language, tig, Tigre NLP, low-resource language, Ge'ez, Eritrea, Sudan, Tigre dataset, Tigre machine translation, Tigre ASR, Tigre dictionary, BeitTigreAI, Semitic languages, language technology">
<meta name="author" content="BeitTigreAI">
<meta name="robots" content="index, follow, max-image-preview:large">
<link rel="canonical" href="https://beittigreai-resources.static.hf.space/">
<!-- Favicons (BeitTigreAI mark) -->
<link rel="icon" href="favicon-v2.ico" sizes="any">
<link rel="icon" type="image/png" sizes="32x32" href="favicon-32x32-v2.png">
<link rel="icon" type="image/png" sizes="16x16" href="favicon-16x16-v2.png">
<link rel="apple-touch-icon" sizes="180x180" href="apple-touch-icon-v2.png">
<!-- Google Search Console: paste the meta tag Google gives you, then commit & click Verify -->
<meta name="google-site-verification" content="REPLACE_WITH_YOUR_VERIFICATION_CODE">
<!-- Open Graph (Facebook, LinkedIn, etc.) -->
<meta property="og:type" content="website">
<meta property="og:title" content="Tigre Language NLP Resources — Datasets, Models, Dictionaries & Tools">
<meta property="og:description" content="A curated index of open datasets, models, dictionaries, and tools for the Tigre language (ISO 639-3: tig).">
<meta property="og:url" content="https://beittigreai-resources.static.hf.space/">
<meta property="og:image" content="https://beittigreai-resources.static.hf.space/preview.png">
<meta property="og:locale" content="en_US">
<meta property="og:site_name" content="Tigre Language Digitization Effort">
<!-- Twitter / X card -->
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:title" content="Tigre Language NLP Resources">
<meta name="twitter:description" content="Open datasets, models, dictionaries, and tools for the Tigre language (ISO 639-3: tig).">
<meta name="twitter:image" content="https://beittigreai-resources.static.hf.space/preview.png">
<meta name="google-site-verification" content="KNCaxaIlgTJD-fPfXV44F_tXt5vlhEzj7o7Im4FJ50M" />
<!-- Structured data: collection of datasets (eligible for Google Dataset Search) -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "CollectionPage",
"name": "Tigre Language Digitization Effort — Resource Index",
"description": "Curated index of open datasets, models, dictionaries, corpora, and tools for the Tigre language (ISO 639-3: tig).",
"url": "https://beittigreai-resources.static.hf.space/",
"inLanguage": "en",
"about": {
"@type": "Language",
"name": "Tigre",
"alternateName": "tig"
},
"mainEntity": {
"@type": "ItemList",
"itemListElement": [
{
"@type": "Dataset",
"name": "Tigre parallel multilingual corpus",
"description": "Parallel corpus (~330k rows) pairing Tigre with several languages, for training and evaluating machine-translation models.",
"url": "https://huggingface.co/datasets/BeitTigreAI/tigre-data-parallel-multilingual",
"inLanguage": "tig",
"keywords": ["Tigre", "machine translation", "parallel corpus", "low-resource NLP"],
"creator": {"@type": "Organization", "name": "BeitTigreAI"}
},
{
"@type": "Dataset",
"name": "Tigre text-to-speech training data",
"description": "Paired audio/text resource (~6.77k rows) for training Tigre text-to-speech systems.",
"url": "https://huggingface.co/datasets/BeitTigreAI/tigre-tts-training",
"inLanguage": "tig",
"keywords": ["Tigre", "text-to-speech", "TTS", "speech corpus"],
"creator": {"@type": "Organization", "name": "BeitTigreAI"}
},
{
"@type": "Dataset",
"name": "Tigre lexicon dataset",
"description": "Tigre lexicon with ~420k entries, a vocabulary resource for NLP and linguistic research.",
"url": "https://huggingface.co/datasets/BeitTigreAI/tigre-data-lexicon",
"inLanguage": "tig",
"keywords": ["Tigre", "lexicon", "dictionary", "vocabulary"],
"creator": {"@type": "Organization", "name": "BeitTigreAI"}
},
{
"@type": "Dataset",
"name": "Tigre Wikipedia text corpus",
"description": "Monolingual Tigre text extracted from Wikipedia, for pretraining and language modeling.",
"url": "https://huggingface.co/datasets/BeitTigreAI/tigre-data-wikipedia",
"inLanguage": "tig",
"keywords": ["Tigre", "Wikipedia", "monolingual corpus"],
"creator": {"@type": "Organization", "name": "BeitTigreAI"}
}
]
}
}
</script>
<!-- ============================================================ -->
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,400;9..144,600;9..144,900&family=Spline+Sans:wght@400;500;600&family=Cairo:wght@400;600;700;900&display=swap" rel="stylesheet">
<style>
:root{
--ink:#0c3a4d;--ink-soft:#446e7e;--paper:#e9f5fc;--paper-2:#d6ecf8;
--card:#ffffff;--rust:#178a23;--rust-deep:#0e6817;--gold:#3ba3dd;
--green:#178a23;--blue:#3ba3dd;--line:#bfdcec;
}
*{box-sizing:border-box;margin:0;padding:0}
html{scroll-behavior:smooth}
body{
font-family:'Spline Sans',sans-serif;color:var(--ink);background:var(--paper);
background-image:radial-gradient(circle at 10% -5%,rgba(59,163,221,.18),transparent 45%),
radial-gradient(circle at 96% 4%,rgba(23,138,35,.12),transparent 40%),
radial-gradient(circle at 50% 120%,rgba(59,163,221,.14),transparent 48%);
line-height:1.42;-webkit-font-smoothing:antialiased;font-size:14px;
}
/* Arabic typography when active */
body.ar{font-family:'Cairo',sans-serif;line-height:1.6}
body.ar h1,body.ar h2,body.ar .name{font-family:'Cairo',sans-serif}
body.ar h1{font-weight:900;letter-spacing:0}
body.ar h2{font-weight:700;letter-spacing:0}
body.ar .kicker{letter-spacing:0;text-transform:none}
body.ar .num{font-family:'Cairo',sans-serif}
/* Language toggle — fixed top-right in both directions */
.langtoggle{position:fixed;top:12px;right:14px;z-index:50;
font-family:'Cairo','Spline Sans',sans-serif;font-size:.8rem;font-weight:700;
background:var(--card);color:var(--blue);border:1.5px solid var(--blue);
border-radius:999px;padding:6px 16px;cursor:pointer;box-shadow:0 2px 8px rgba(12,58,77,.12);
transition:all .15s}
.langtoggle:hover{background:var(--blue);color:#fff}
.flagbar{height:5px;display:flex;border-radius:3px;overflow:hidden;margin-bottom:14px}
.flagbar i{flex:1}
.flagbar i:nth-child(1){background:var(--blue)}
.flagbar i:nth-child(2){background:var(--green)}
.flagbar i:nth-child(3){background:var(--blue)}
.wrap{max-width:1080px;margin:0 auto;padding:0 22px}
header{padding:30px 0 16px;border-bottom:2px solid var(--ink)}
.kicker{font-size:.66rem;letter-spacing:.3em;text-transform:uppercase;color:var(--rust);font-weight:600;margin-bottom:8px}
.head-flex{display:flex;justify-content:space-between;align-items:flex-end;gap:24px;flex-wrap:wrap}
h1{font-family:'Fraunces',serif;font-weight:900;font-size:clamp(1.9rem,4vw,2.7rem);line-height:.96;letter-spacing:-.02em}
h1 .geez{font-weight:400;font-size:.6em;color:var(--rust-deep);margin:0 .35em}
.lede{margin-top:8px;max-width:64ch;font-size:.9rem;color:var(--ink-soft)}
nav.toc{margin:12px 0 2px;display:flex;flex-wrap:wrap;gap:5px}
nav.toc a{font-size:.7rem;text-decoration:none;color:var(--ink);border:1px solid var(--line);background:var(--card);padding:4px 9px;border-radius:5px;transition:all .15s}
nav.toc a:hover{background:var(--blue);color:#fff;border-color:var(--blue)}
section{padding:18px 0 2px}
.sec-head{display:flex;align-items:baseline;gap:10px;margin-bottom:10px}
.num{font-family:'Fraunces',serif;font-weight:900;font-size:.8rem;color:#0c5a7d;background:#d6ecf8;border:1px solid var(--blue);border-radius:5px;min-width:1.9em;height:1.6em;display:inline-flex;align-items:center;justify-content:center;flex:0 0 auto}
h2{font-family:'Fraunces',serif;font-weight:600;font-size:1.18rem;letter-spacing:-.01em;line-height:1.05}
.sec-head .rule{flex:1;height:1px;background:var(--line);align-self:center}
.grid{display:grid;grid-template-columns:1fr 1fr;gap:8px}
.entry{background:var(--card);border:1px solid var(--line);border-radius:8px;padding:10px 12px;transition:border-color .15s}
.entry:hover{border-color:var(--gold)}
.entry .title-line{display:flex;align-items:center;gap:7px;flex-wrap:wrap}
.entry a.name{font-family:'Fraunces',serif;font-weight:600;font-size:.98rem;color:var(--rust-deep);text-decoration:none;letter-spacing:-.01em;line-height:1.15}
.entry a.name:hover{text-decoration:underline;text-underline-offset:2px}
.entry span.name{font-family:'Fraunces',serif;font-weight:600;font-size:.98rem;color:var(--ink);letter-spacing:-.01em;line-height:1.15}
.tag{font-size:.58rem;letter-spacing:.06em;text-transform:uppercase;padding:1px 6px;border-radius:4px;font-weight:600;white-space:nowrap}
body.ar .tag{letter-spacing:0;text-transform:none}
.tag.private{background:#dbeef9;color:#0c5a7d;border:1px solid #b6d9ee}
.tag.pending{background:#e3f3e6;color:#0e6817;border:1px solid #bfe3c6}
.tag.print{background:#dcf0e1;color:#0c6b1f;border:1px solid #b6e0c0}
.entry p{margin-top:3px;color:var(--ink-soft);font-size:.81rem;line-height:1.35}
footer{margin-top:20px;padding:14px 0 24px;border-top:2px solid var(--ink);color:var(--ink-soft);font-size:.78rem}
@media(max-width:680px){.grid{grid-template-columns:1fr}}
@media print{
.langtoggle{display:none}
body{font-size:11px;background:#fff;background-image:none}
nav.toc{display:none}
.entry:hover{border-color:var(--line)}
section{padding:10px 0 0}.grid{gap:5px}
.entry{padding:6px 8px;break-inside:avoid}
.sec-head{margin-bottom:6px}header{padding:14px 0 12px}
@page{margin:12mm}
}
</style>
</head>
<body>
<button class="langtoggle" id="langToggle" aria-label="التبديل إلى العربية">العربية</button>
<div class="wrap">
<header>
<div class="flagbar"><i></i><i></i><i></i></div>
<div class="kicker" data-en="Resource Index · ISO 639-3 · tig" data-ar="فهرس المصادر · ISO 639-3 · tig">Resource Index · ISO 639-3 · tig</div>
<div class="head-flex">
<div>
<h1><span class="title-text" data-en="The Tigre Language Digitization Effort" data-ar="مشروع رقمنة اللغة التِّجري">The Tigre Language Digitization Effort</span><span class="geez">ትግረ</span></h1>
<p class="lede" data-en="A linked index of datasets, tools, reference works, and models for Tigre — a Semitic language of Eritrea and eastern Sudan, written in the Ge'ez script." data-ar="فهرس مرتبط بمجموعات البيانات والأدوات والمراجع والنماذج للغة التِّجري — وهي لغة سامية تُستخدم في إريتريا وشرق السودان وتُكتب بالخط الجعزي.">A linked index of datasets, tools, reference works, and models for Tigre — a Semitic language of Eritrea and eastern Sudan, written in the Ge'ez script.</p>
</div>
</div>
<nav class="toc">
<a href="#data" data-en="Datasets" data-ar="البيانات">Datasets</a><a href="#speech" data-en="Speech" data-ar="الكلام">Speech</a><a href="#ref" data-en="Reference" data-ar="المراجع">Reference</a><a href="#web" data-en="Web" data-ar="الويب">Web</a><a href="#tools" data-en="Tools" data-ar="الأدوات">Tools</a><a href="#research" data-en="Research" data-ar="الأبحاث">Research</a><a href="#project" data-en="BeitTigreAI" data-ar="بيت تِجري">BeitTigreAI</a><a href="#models" data-en="HF Models" data-ar="نماذج HF">HF Models</a><a href="#hfdata" data-en="HF Datasets" data-ar="بيانات HF">HF Datasets</a>
</nav>
</header>
<section id="data">
<div class="sec-head"><h2 data-en="Datasets &amp;amp; Corpora" data-ar="مجموعات البيانات والمدوّنات">Datasets &amp; Corpora</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">1</span><a class="name" href="https://translate.google.com/" target="_blank" rel="noopener" data-en="Google Translate Dataset" data-ar="مجموعة بيانات ترجمة Google">Google Translate Dataset</a></div>
<p data-en="English–Tigre set of 8,000 translated sentences (800 easy, 3,134 intermediate, 4,000 long), for MT development (service to start soon)." data-ar="مجموعة إنجليزي–تِجري مكوّنة من 8000 جملة مترجمة (800 سهلة، 3134 متوسطة، 4000 طويلة) لتطوير الترجمة الآلية (الخدمة ستبدأ قريبًا).">English–Tigre set of 8,000 translated sentences (800 easy, 3,134 intermediate, 4,000 long), for MT development (service to start soon).</p></div>
<div class="entry"><div class="title-line"><span class="num">2</span><a class="name" href="https://tatoeba.org/en/sentences/show_all_in/tig/none" target="_blank" rel="noopener" data-en="tatoeba.org — Tigre" data-ar="tatoeba.org — التِّجري">tatoeba.org — Tigre</a></div>
<p data-en="Crowdsourced example sentences with translations. Tigre ranks 30th of 429 languages by sentence count." data-ar="جُمل أمثلة مع ترجماتها بمساهمة المجتمع. تحتل التِّجري المرتبة 30 من بين 429 لغة من حيث عدد الجُمل.">Crowdsourced example sentences with translations. Tigre ranks 30th of 429 languages by sentence count.</p></div>
<div class="entry"><div class="title-line"><span class="num">3</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-parallel-multilingual" target="_blank" rel="noopener" data-en="Parallel multilingual corpus" data-ar="مدوّنة متوازية متعددة اللغات">Parallel multilingual corpus</a></div>
<p data-en="~330k rows pairing Tigre with several languages. Foundation for translation models and the dictionaries below." data-ar="نحو 330 ألف صف تربط التِّجري بعدة لغات. أساس لنماذج الترجمة والقواميس أدناه.">~330k rows pairing Tigre with several languages. Foundation for translation models and the dictionaries below.</p></div>
</div>
</section>
<section id="speech">
<div class="sec-head"><h2 data-en="Speech" data-ar="الكلام والصوت">Speech</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">4</span><a class="name" href="https://commonvoice.mozilla.org/tig/speak" target="_blank" rel="noopener" data-en="Mozilla Common Voice — Tigre" data-ar="Mozilla Common Voice — التِّجري">Mozilla Common Voice — Tigre</a></div>
<p data-en="Community speech-donation platform for collecting spoken Tigre. About 11 hours donated so far." data-ar="منصة مجتمعية للتبرع بالتسجيلات الصوتية لجمع اللغة التِّجري المنطوقة. تم التبرع بنحو 11 ساعة حتى الآن.">Community speech-donation platform for collecting spoken Tigre. About 11 hours donated so far.</p></div>
</div>
</section>
<section id="ref">
<div class="sec-head"><h2 data-en="Reference Works" data-ar="المراجع والقواميس">Reference Works</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">5</span><span class="name" data-en="Tigre Dictionary — M. Mussie Bekhit" data-ar="قاموس التِّجري — م. موسي بخيت">Tigre Dictionary — M. Mussie Bekhit</span><span class="tag pending" data-en="Now Being Published" data-ar="قيد النشر">Now Being Published</span></div>
<p data-en="First English–Tigrinya–Tigre dictionary with around 6,200 vocabulary entries." data-ar="أول قاموس إنجليزي–تجرينية–تِجري يضم نحو 6200 مدخل لغوي.">First English–Tigrinya–Tigre dictionary with around 6,200 vocabulary entries.</p></div>
<div class="entry"><div class="title-line"><span class="num">6</span><a class="name" href="https://beittigre.github.io/tigre-multilingual-dictionaries/" target="_blank" rel="noopener" data-en="Tigre Multilingual Dictionaries" data-ar="قواميس التِّجري متعددة اللغات">Tigre Multilingual Dictionaries</a></div>
<p data-en="Searchable phrasebooks: English (58k), Arabic (31k), German (27k), Swedish (15k). From the open parallel corpus." data-ar="كتب عبارات قابلة للبحث: الإنجليزية (58 ألف)، العربية (31 ألف)، الألمانية (27 ألف)، السويدية (15 ألف). مستخرجة من المدوّنة المتوازية المفتوحة.">Searchable phrasebooks: English (58k), Arabic (31k), German (27k), Swedish (15k). From the open parallel corpus.</p></div>
<div class="entry"><div class="title-line"><span class="num">7</span><span class="name" data-en="Tigre–Arabic Dictionary — Mohammed Ahmed" data-ar="قاموس تِجري–عربي — محمد أحمد">Tigre–Arabic Dictionary — Mohammed Ahmed</span><span class="tag print" data-en="In Progress" data-ar="قيد الإنجاز">In Progress</span></div>
<p data-en="A Tigre–Arabic dictionary compiled in Kassala, Sudan. A community reference resource." data-ar="قاموس تِجري–عربي جُمِّع في كسلا بالسودان. مرجع مجتمعي.">A Tigre–Arabic dictionary compiled in Kassala, Sudan. A community reference resource.</p></div>
</div>
</section>
<section id="web">
<div class="sec-head"><h2 data-en="Encyclopedic &amp;amp; Web" data-ar="الموسوعات والويب">Encyclopedic &amp; Web</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">8</span><a class="name" href="https://tig.wikipedia.org/" target="_blank" rel="noopener" data-en="Tigre Wikipedia (tig.wikipedia.org)" data-ar="ويكيبيديا التِّجري (tig.wikipedia.org)">Tigre Wikipedia (tig.wikipedia.org)</a></div>
<p data-en="The Tigre-language edition of Wikipedia in Ge'ez script. A growing source of encyclopedic text." data-ar="نسخة ويكيبيديا باللغة التِّجري بالخط الجعزي. مصدر متنامٍ للنصوص الموسوعية.">The Tigre-language edition of Wikipedia in Ge'ez script. A growing source of encyclopedic text.</p></div>
</div>
</section>
<section id="tools">
<div class="sec-head"><h2 data-en="Tools &amp;amp; Software" data-ar="الأدوات والبرمجيات">Tools &amp; Software</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">9</span><a class="name" href="https://github.com/mssrchapelier/tigre-parser" target="_blank" rel="noopener" data-en="TigreParser" data-ar="TigreParser">TigreParser</a></div>
<p data-en="Rule-based morphological analyzer for Tigre in Java. From a Moscow State University thesis (2019, refactored 2020)." data-ar="محلّل صرفي قائم على القواعد للغة التِّجري بلغة Java. من أطروحة بجامعة موسكو الحكومية (2019، أُعيدت هيكلته 2020).">Rule-based morphological analyzer for Tigre in Java. From a Moscow State University thesis (2019, refactored 2020).</p></div>
<div class="entry"><div class="title-line"><span class="num">10</span><a class="name" href="https://github.com/hltdi/HornMorpho" target="_blank" rel="noopener" data-en="HornMorpho" data-ar="HornMorpho">HornMorpho</a></div>
<p data-en="Python rule-based morphological analyzer/generator. Supports Tigre alongside Amharic, Oromo, and Tigrinya." data-ar="محلّل/مولّد صرفي قائم على القواعد بلغة Python. يدعم التِّجري إلى جانب الأمهرية والأورومو والتجرينية.">Python rule-based morphological analyzer/generator. Supports Tigre alongside Amharic, Oromo, and Tigrinya.</p></div>
</div>
</section>
<section id="research">
<div class="sec-head"><h2 data-en="Research &amp;amp; Partnerships" data-ar="الأبحاث والشراكات">Research &amp; Partnerships</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">11</span><span class="name" data-en="TigMM Corpus &amp;amp; Model Suite" data-ar="حزمة مدوّنة ونماذج TigMM">TigMM Corpus &amp; Model Suite</span></div>
<p data-en="Paper by Beshir Ibrahim. Reports BLEU 33.09 / chrF++ 40.91 for MT and low ASR word error rates." data-ar="بحث بقلم بشير إبراهيم. يسجّل BLEU 33.09 / chrF++ 40.91 للترجمة الآلية ومعدلات خطأ منخفضة في التعرف على الكلام.">Paper by Beshir Ibrahim. Reports BLEU 33.09 / chrF++ 40.91 for MT and low ASR word error rates.</p></div>
<div class="entry"><div class="title-line"><span class="num">12</span><a class="name" href="https://bouquet.metademolab.com/translate" target="_blank" rel="noopener" data-en="Bouquet (Meta) — Tigre" data-ar="Bouquet (Meta) — التِّجري">Bouquet (Meta) — Tigre</a></div>
<p data-en="Meta AI platform for crowdsourced paragraph translation into Tigre (Ge'ez). Collects human reference translations." data-ar="منصة Meta AI لترجمة الفقرات إلى التِّجري (بالخط الجعزي) بمساهمة المجتمع. تجمع ترجمات مرجعية بشرية.">Meta AI platform for crowdsourced paragraph translation into Tigre (Ge'ez). Collects human reference translations.</p></div>
<div class="entry"><div class="title-line"><span class="num">13</span><a class="name" href="https://about.fb.com/news/2025/02/announcing-language-technology-partner-program/" target="_blank" rel="noopener" data-en="UNESCO × Meta Partner Program" data-ar="برنامج شراكة اليونسكو × Meta">UNESCO × Meta Partner Program</a></div>
<p data-en="Initiative supporting language tech for under-resourced languages. The project participates as an official partner." data-ar="مبادرة لدعم تقنيات اللغة للّغات محدودة الموارد. يشارك المشروع كشريك رسمي.">Initiative supporting language tech for under-resourced languages. The project participates as an official partner.</p></div>
</div>
</section>
<section id="project">
<div class="sec-head"><h2 data-en="BeitTigreAI Project" data-ar="مشروع بيت تِجري">BeitTigreAI Project</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">14</span><a class="name" href="https://beittigreai-resources.static.hf.space/" target="_blank" rel="noopener" data-en="BeitTigreAI — project site" data-ar="بيت تِجري — موقع المشروع">BeitTigreAI — project site</a></div>
<p data-en="Home of the BeitTigreAI effort on NLP for Tigre and Ge'ez, covering data and ML across MT, ASR, TTS, and LLMs." data-ar="موطن جهود بيت تِجري في معالجة اللغة الطبيعية للتِّجري والجعزية، ويغطي البيانات والتعلم الآلي عبر الترجمة الآلية والتعرف على الكلام وتحويل النص إلى كلام والنماذج اللغوية الكبيرة.">Home of the BeitTigreAI effort on NLP for Tigre and Ge'ez, covering data and ML across MT, ASR, TTS, and LLMs.</p></div>
<div class="entry"><div class="title-line"><span class="num">15</span><a class="name" href="https://huggingface.co/BeitTigreAI" target="_blank" rel="noopener" data-en="BeitTigreAI on Hugging Face" data-ar="بيت تِجري على Hugging Face">BeitTigreAI on Hugging Face</a></div>
<p data-en="The organization's hub of models and datasets — central access point for the artifacts listed below." data-ar="مركز المنظمة للنماذج ومجموعات البيانات — نقطة الوصول المركزية للعناصر المذكورة أدناه.">The organization's hub of models and datasets — central access point for the artifacts listed below.</p></div>
</div>
</section>
<section id="models">
<div class="sec-head"><h2 data-en="BeitTigreAI Models · Hugging Face" data-ar="نماذج بيت تِجري · Hugging Face">BeitTigreAI Models · Hugging Face</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">16</span><a class="name" href="https://huggingface.co/BeitTigreAI/gemma-4-E2B-sft-tran-tigre" target="_blank" rel="noopener" data-en="gemma-4-E2B-sft-tran-tigre" data-ar="gemma-4-E2B-sft-tran-tigre">gemma-4-E2B-sft-tran-tigre</a></div>
<p data-en="Gemma-4 E2B (5B) SFT for text generation. Tuned for translation tasks into Tigre." data-ar="نموذج Gemma-4 E2B (5B) مضبوط بالإشراف لتوليد النص. مُحسّن لمهام الترجمة إلى التِّجري.">Gemma-4 E2B (5B) SFT for text generation. Tuned for translation tasks into Tigre.</p></div>
<div class="entry"><div class="title-line"><span class="num">17</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-llm-gemma4" target="_blank" rel="noopener" data-en="tigre-llm-gemma4" data-ar="tigre-llm-gemma4">tigre-llm-gemma4</a></div>
<p data-en="An 8B Tigre LLM built on Gemma 4. General generation and instruction following in Tigre." data-ar="نموذج لغوي كبير للتِّجري بحجم 8B مبني على Gemma 4. للتوليد العام واتباع التعليمات بالتِّجري.">An 8B Tigre LLM built on Gemma 4. General generation and instruction following in Tigre.</p></div>
<div class="entry"><div class="title-line"><span class="num">18</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-sonar-encoder" target="_blank" rel="noopener" data-en="tigre-sonar-encoder" data-ar="tigre-sonar-encoder">tigre-sonar-encoder</a></div>
<p data-en="SONAR-based multilingual sentence encoder for Tigre. Produces embeddings for retrieval and cross-lingual tasks." data-ar="مُرمِّز جُمل متعدد اللغات قائم على SONAR للتِّجري. يُنتج تمثيلات متجهة للاسترجاع والمهام عبر اللغات.">SONAR-based multilingual sentence encoder for Tigre. Produces embeddings for retrieval and cross-lingual tasks.</p></div>
<div class="entry"><div class="title-line"><span class="num">19</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-asr-omniASR_CTC_3B" target="_blank" rel="noopener" data-en="tigre-asr-omniASR_CTC_3B" data-ar="tigre-asr-omniASR_CTC_3B">tigre-asr-omniASR_CTC_3B</a></div>
<p data-en="A 3B OmniASR CTC speech-recognition model. Transcribes spoken Tigre audio into text." data-ar="نموذج تعرّف على الكلام OmniASR CTC بحجم 3B. يحوّل الصوت المنطوق بالتِّجري إلى نص.">A 3B OmniASR CTC speech-recognition model. Transcribes spoken Tigre audio into text.</p></div>
<div class="entry"><div class="title-line"><span class="num">20</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-asr-Wav2Vec2Bert" target="_blank" rel="noopener" data-en="tigre-asr-Wav2Vec2Bert" data-ar="tigre-asr-Wav2Vec2Bert">tigre-asr-Wav2Vec2Bert</a></div>
<p data-en="A 0.6B Wav2Vec2-BERT ASR model for accurate Tigre audio transcription." data-ar="نموذج تعرّف على الكلام Wav2Vec2-BERT بحجم 0.6B لنسخ صوت التِّجري بدقة.">A 0.6B Wav2Vec2-BERT ASR model for accurate Tigre audio transcription.</p></div>
<div class="entry"><div class="title-line"><span class="num">21</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-llm-Llama3.2-1B" target="_blank" rel="noopener" data-en="tigre-llm-Llama3.2-1B" data-ar="tigre-llm-Llama3.2-1B">tigre-llm-Llama3.2-1B</a></div>
<p data-en="A compact 1B Tigre LLM based on Llama 3.2. Lightweight text generation option." data-ar="نموذج لغوي مدمج للتِّجري بحجم 1B مبني على Llama 3.2. خيار خفيف لتوليد النص.">A compact 1B Tigre LLM based on Llama 3.2. Lightweight text generation option.</p></div>
<div class="entry"><div class="title-line"><span class="num">22</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-xlm-roberta-base" target="_blank" rel="noopener" data-en="tigre-xlm-roberta-base" data-ar="tigre-xlm-roberta-base">tigre-xlm-roberta-base</a></div>
<p data-en="XLM-RoBERTa base adapted for Tigre. For classification, tagging, and language understanding." data-ar="نموذج XLM-RoBERTa الأساسي مُكيَّف للتِّجري. للتصنيف ووسم الكلمات وفهم اللغة.">XLM-RoBERTa base adapted for Tigre. For classification, tagging, and language understanding.</p></div>
<div class="entry"><div class="title-line"><span class="num">23</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-nllb-200-3.3B" target="_blank" rel="noopener" data-en="tigre-nllb-200-3.3B" data-ar="tigre-nllb-200-3.3B">tigre-nllb-200-3.3B</a></div>
<p data-en="A 3.3B NLLB-200 MT model covering Tigre. High-quality translation to and from other languages." data-ar="نموذج ترجمة آلية NLLB-200 بحجم 3.3B يشمل التِّجري. ترجمة عالية الجودة من وإلى لغات أخرى.">A 3.3B NLLB-200 MT model covering Tigre. High-quality translation to and from other languages.</p></div>
<div class="entry"><div class="title-line"><span class="num">24</span><a class="name" href="https://huggingface.co/BeitTigreAI/tigre-nllb-200-distilled-600M" target="_blank" rel="noopener" data-en="tigre-nllb-200-distilled-600M" data-ar="tigre-nllb-200-distilled-600M">tigre-nllb-200-distilled-600M</a></div>
<p data-en="Distilled 600M NLLB-200 MT model. Smaller, faster variant for constrained deployment." data-ar="نموذج ترجمة NLLB-200 مُقطَّر بحجم 600M. نسخة أصغر وأسرع للنشر المحدود الموارد.">Distilled 600M NLLB-200 MT model. Smaller, faster variant for constrained deployment.</p></div>
</div>
</section>
<section id="hfdata">
<div class="sec-head"><h2 data-en="BeitTigreAI Datasets · Hugging Face" data-ar="مجموعات بيانات بيت تِجري · Hugging Face">BeitTigreAI Datasets · Hugging Face</h2><span class="rule"></span></div>
<div class="grid">
<div class="entry"><div class="title-line"><span class="num">25</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-tts-training" target="_blank" rel="noopener" data-en="tigre-tts-training" data-ar="tigre-tts-training">tigre-tts-training</a></div>
<p data-en="Training data for Tigre text-to-speech. Large paired audio/text resource (~6.77k rows, viewer enabled)." data-ar="بيانات تدريب لتحويل نص التِّجري إلى كلام. مورد كبير من الصوت/النص المقترن (نحو 6.77 ألف صف، مع عارض).">Training data for Tigre text-to-speech. Large paired audio/text resource (~6.77k rows, viewer enabled).</p></div>
<div class="entry"><div class="title-line"><span class="num">26</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-tts-model" target="_blank" rel="noopener" data-en="tigre-tts-model" data-ar="tigre-tts-model">tigre-tts-model</a></div>
<p data-en="Assets and data supporting a Tigre TTS model. Companion to the TTS training set." data-ar="أصول وبيانات تدعم نموذج تحويل النص إلى كلام للتِّجري. مرافق لمجموعة تدريب TTS.">Assets and data supporting a Tigre TTS model. Companion to the TTS training set.</p></div>
<div class="entry"><div class="title-line"><span class="num">27</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-fasttext" target="_blank" rel="noopener" data-en="tigre-data-fasttext" data-ar="tigre-data-fasttext">tigre-data-fasttext</a></div>
<p data-en="Data for FastText word embeddings in Tigre. For lightweight vectorization and language ID." data-ar="بيانات لتمثيلات كلمات FastText بالتِّجري. للتمثيل المتجهي الخفيف وتحديد اللغة.">Data for FastText word embeddings in Tigre. For lightweight vectorization and language ID.</p></div>
<div class="entry"><div class="title-line"><span class="num">28</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-speech-audio" target="_blank" rel="noopener" data-en="tigre-data-speech-audio" data-ar="tigre-data-speech-audio">tigre-data-speech-audio</a></div>
<p data-en="A collection of Tigre speech audio recordings. Raw audio for ASR and TTS development." data-ar="مجموعة من التسجيلات الصوتية بالتِّجري. صوت خام لتطوير التعرف على الكلام وتحويل النص إلى كلام.">A collection of Tigre speech audio recordings. Raw audio for ASR and TTS development.</p></div>
<div class="entry"><div class="title-line"><span class="num">29</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-speech-text-aligned" target="_blank" rel="noopener" data-en="tigre-speech-text-aligned" data-ar="tigre-speech-text-aligned">tigre-speech-text-aligned</a></div>
<p data-en="Speech recordings aligned with text transcripts. Suitable for training and evaluating ASR models." data-ar="تسجيلات صوتية محاذاة مع نصوصها. مناسبة لتدريب وتقييم نماذج التعرف على الكلام.">Speech recordings aligned with text transcripts. Suitable for training and evaluating ASR models.</p></div>
<div class="entry"><div class="title-line"><span class="num">30</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-parallel-multilingual" target="_blank" rel="noopener" data-en="tigre-data-parallel-multilingual" data-ar="tigre-data-parallel-multilingual">tigre-data-parallel-multilingual</a></div>
<p data-en="A parallel multilingual corpus including Tigre (~330k rows). For training and evaluating MT models." data-ar="مدوّنة متوازية متعددة اللغات تشمل التِّجري (نحو 330 ألف صف). لتدريب وتقييم نماذج الترجمة الآلية.">A parallel multilingual corpus including Tigre (~330k rows). For training and evaluating MT models.</p></div>
<div class="entry"><div class="title-line"><span class="num">31</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-wikipedia" target="_blank" rel="noopener" data-en="tigre-data-wikipedia" data-ar="tigre-data-wikipedia">tigre-data-wikipedia</a></div>
<p data-en="Tigre text extracted from Wikipedia. A monolingual corpus for pretraining and language modeling." data-ar="نص بالتِّجري مستخرج من ويكيبيديا. مدوّنة أحادية اللغة للتدريب المسبق والنمذجة اللغوية.">Tigre text extracted from Wikipedia. A monolingual corpus for pretraining and language modeling.</p></div>
<div class="entry"><div class="title-line"><span class="num">32</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-lexicon" target="_blank" rel="noopener" data-en="tigre-data-lexicon" data-ar="tigre-data-lexicon">tigre-data-lexicon</a></div>
<p data-en="A Tigre lexicon dataset (~420k entries, viewer enabled). Vocabulary resource for NLP and linguistic work." data-ar="مجموعة بيانات معجم التِّجري (نحو 420 ألف مدخل، مع عارض). مورد مفردات لمعالجة اللغة والعمل اللغوي.">A Tigre lexicon dataset (~420k entries, viewer enabled). Vocabulary resource for NLP and linguistic work.</p></div>
<div class="entry"><div class="title-line"><span class="num">33</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-kenLM" target="_blank" rel="noopener" data-en="tigre-data-kenLM" data-ar="tigre-data-kenLM">tigre-data-kenLM</a></div>
<p data-en="Data for building KenLM n-gram language models. Often used to rescore ASR and translation output." data-ar="بيانات لبناء نماذج لغوية KenLM من نوع n-gram. تُستخدم غالبًا لإعادة ترتيب مخرجات التعرف على الكلام والترجمة.">Data for building KenLM n-gram language models. Often used to rescore ASR and translation output.</p></div>
<div class="entry"><div class="title-line"><span class="num">34</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-monolingual-text" target="_blank" rel="noopener" data-en="tigre-data-monolingual-text" data-ar="tigre-data-monolingual-text">tigre-data-monolingual-text</a></div>
<p data-en="A monolingual Tigre text corpus (preview). General-purpose data for language modeling and pretraining." data-ar="مدوّنة نصية أحادية اللغة بالتِّجري (معاينة). بيانات عامة الغرض للنمذجة اللغوية والتدريب المسبق.">A monolingual Tigre text corpus (preview). General-purpose data for language modeling and pretraining.</p></div>
<!--
<div class="entry"><div class="title-line"><span class="num">35</span><a class="name" href="https://huggingface.co/datasets/BeitTigreAI/tigre-data-dictionary" target="_blank" rel="noopener" data-en="tigre-data-dictionary" data-ar="tigre-data-dictionary">tigre-data-dictionary</a></div>
<p data-en="A structured Tigre dictionary dataset. Reference resource for definitions and word-level information." data-ar="مجموعة بيانات قاموس تِجري منظم. مورد مرجعي للتعريفات والمعلومات على مستوى الكلمة.">A structured Tigre dictionary dataset. Reference resource for definitions and word-level information.</p></div>
</div>
-->
</section>
</div>
<script>
(function(){
function setLang(lang){
var html=document.documentElement;
html.lang=lang;
html.dir=(lang==='ar')?'rtl':'ltr';
document.body.classList.toggle('ar', lang==='ar');
document.querySelectorAll('[data-en]').forEach(function(el){
var t=el.getAttribute('data-'+lang);
if(t!==null) el.textContent=t;
});
var btn=document.getElementById('langToggle');
btn.textContent=(lang==='ar')?'English':'العربية';
btn.setAttribute('aria-label',(lang==='ar')?'Switch to English':'التبديل إلى العربية');
}
document.getElementById('langToggle').addEventListener('click',function(){
var cur=(document.documentElement.lang==='ar')?'ar':'en';
setLang(cur==='ar'?'en':'ar');
});
})();
</script>
</body>
</html>