File size: 2,299 Bytes
4dcf807 4466506 4dcf807 4466506 4dcf807 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | {
"dataset_name": "TEXT-AUTH-Eval",
"version": "1.0",
"total_samples": 7502,
"human_samples": 781,
"ai_samples": 781,
"challenge_samples": {
"paraphrased": 2500,
"cross_model": 3440
},
"domains": [
"general",
"academic",
"creative",
"ai_ml",
"software_dev",
"technical_doc",
"engineering",
"science",
"business",
"legal",
"medical",
"journalism",
"marketing",
"social_media",
"blog_personal",
"tutorial"
],
"human_sources": {
"general": "Wikipedia",
"academic": "scientific_papers (arXiv abstracts)",
"creative": "Project Gutenberg / C4 filtered",
"ai_ml": "scientific_papers (arXiv with ML keywords)",
"software_dev": "C4 filtered (code/documentation keywords)",
"technical_doc": "C4 filtered (documentation keywords)",
"engineering": "scientific_papers (arXiv engineering)",
"science": "C4 filtered (scientific keywords)",
"business": "C4 filtered (business/financial keywords)",
"legal": "lex_glue / C4 filtered (legal keywords)",
"medical": "scientific_papers (PubMed abstracts)",
"journalism": "cnn_dailymail",
"marketing": "C4 filtered (marketing keywords)",
"social_media": "tweet_eval / C4 filtered (social keywords)",
"blog_personal": "C4 filtered (personal narrative keywords)",
"tutorial": "C4 filtered (tutorial/guide keywords)"
},
"ai_generation": {
"primary_model": "mistral:7b (via Ollama)",
"cross_model": "llama3:8b (via Ollama)",
"paraphrasing": "mistral:7b (via Ollama instruction-based rephrasing)"
},
"notes": [
"All AI-generated texts produced using local Ollama models to avoid Hugging Face downloads",
"Paraphrased set created by instructing mistral:7b to rephrase original AI texts",
"Cross-model set generated using llama3:8b (unseen during primary AI generation)",
"Human texts sourced exclusively from public, auto-downloadable datasets"
],
"license": "CC BY / Public Domain / Fair Use \u2014 for research only",
"created": "2025",
"compatible_with": "TEXT-AUTH v1.0.0"
} |