DialectAnalysis / samples.jsonl
thomascerniglia's picture
Upload 8 files
05b9702 verified
# Minimal development set (public-domain style snippets; meant for iterative tuning, not evaluation).
# label: one of Attic/Ionic/Doric/Aeolic/Koine
{"id":"attic_tt_1","label":"Attic","text":"ἡ θάλαττα καὶ ἡ γλῶττα· πράττω ἃ δεῖ.","strip_diacritics":true}
{"id":"ionic_ss_1","label":"Ionic","text":"θάλασσα γλῶσσα τάσσω.","strip_diacritics":true}
{"id":"koine_fn_1","label":"Koine","text":"ἵνα γνῶτε ὅτι εἰσῆλθεν εἰς τὸν οἶκον.","strip_diacritics":true}
{"id":"aeolic_inf_1","label":"Aeolic","text":"βουλομαι ποιεμεναι ταδε.","strip_diacritics":true}
{"id":"doric_inf_1","label":"Doric","text":"βουλομαι ποιεμεν ταδε.","strip_diacritics":true}
# Epic/Ionic-like marker example (very small):
{"id":"ionic_epic_1","label":"Ionic","text":"Ἠελίοιο φαεινοῦ.","strip_diacritics":true}
# Homeric epic (Ionic epic language; short excerpt)
{"id":"homer_od_1","label":"Ionic","text":"Ἄνδρα μοι ἔννεπε, Μοῦσα, πολύτροπον, ὃς μάλα πολλὰ πλάγχθη.","strip_diacritics":true}
{"id":"homer_il_1","label":"Ionic","text":"Μῆνιν ἄειδε, θεά, Πηληϊάδεω Ἀχιλῆος.","strip_diacritics":true}
# Attic tragedy (Aeschylus; short excerpt)
{"id":"aesch_1","label":"Attic","text":"πρῶτον μὲν εὐχῇ τῇδε πρεσβεύω θεῶν.","strip_diacritics":true}
{"id":"aesch_2","label":"Attic","text":"ἐς τήνδε γαῖαν ἦλθε Παρνησοῦ θ᾽ ἕδρας.","strip_diacritics":true}
# Aeolic lyric-style markers (short, with pronoun)
{"id":"aeolic_pron_1","label":"Aeolic","text":"αμμι δ᾽ ἄνασσα.","strip_diacritics":true}
# Doric choral-like morphology (very small synthetic but diagnostic)
{"id":"doric_1pl_mes_1","label":"Doric","text":"λεγομεσ ταδε.","strip_diacritics":true}
# Koine (NT-style; short)
{"id":"koine_nt_1","label":"Koine","text":"καὶ ἐγένετο ἐν ταῖς ἡμέραις ἐκείναις.","strip_diacritics":true}