raghub-sevima-test / scripts /rag_eval_pdfs.json
lifedebugger's picture
Deploy files from GitHub repository with LFS
652132b
{
"course_id": "course-eval-five-pdfs-001",
"documents": [
{
"path": "doc/samples/Dokumen Uji RAG Hub Sevima.pdf"
},
{
"path": "doc/samples/Dokumen Uji_RAG_SEVIMA_Kebijakan_Akademik.pdf"
},
{
"path": "doc/samples/Sop Implementasi Tenant Rag Hub Sevima.pdf"
},
{
"path": "doc/samples/Dokumen Rag Mata Kuliah Pemrograman Web Struktur Data Dasar Pemrograman.pdf"
},
{
"path": "doc/samples/Transformer Architecture.pdf"
}
],
"questions": [
{
"question": "Apa nama platform yang dijelaskan dalam dokumen spesifikasi RAG Hub SEVIMA?",
"expected_all": [
"RAG Hub SEVIMA"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa versi dokumen spesifikasi dan operasional RAG Hub SEVIMA?",
"expected_all": [
"1.4.2"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan tanggal rilis dokumen RAG Hub SEVIMA?",
"expected_all": [
"17 April 2026"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Siapa pemilik dokumen RAG Hub SEVIMA?",
"expected_all": [
"Tim Platform Knowledge SEVIMA"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa email kontak eskalasi untuk RAG Hub SEVIMA?",
"expected_all": [
"rag-platform@sevima.test"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa target keberhasilan fase pertama RAG Hub dalam menjawab pertanyaan uji berbasis dokumen?",
"expected_all": [
"78%"
],
"expected_any": [
"minimal"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Sebutkan tiga kelompok pengguna utama pada fase pertama RAG Hub.",
"expected_all": [
"Tim Implementasi Kampus",
"Tim Customer Support",
"Tim Product dan Engineering"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa vector database utama pada RAG Hub versi 1.4.2?",
"expected_all": [
"Qdrant"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa keyword search yang digunakan RAG Hub?",
"expected_all": [
"PostgreSQL Full-Text Search"
],
"expected_any": [
"PostgreSQL"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa ukuran maksimum file yang dijelaskan pada batasan file RAG Hub?",
"expected_all": [
"40 MB"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa jumlah halaman maksimum per dokumen PDF pada RAG Hub?",
"expected_all": [
"300"
],
"expected_any": [
"halaman"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan registrasi ulang mahasiswa wajib dilakukan?",
"expected_all": [
"awal semester ganjil dan genap"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa akibat jika mahasiswa gagal melakukan registrasi ulang?",
"expected_all": [
"non-aktif"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan batas maksimal pembayaran UKT untuk semester ganjil dan genap?",
"expected_all": [
"minggu kedua",
"Agustus",
"Februari"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa syarat mutlak sebelum perkuliahan dimulai terkait KRS?",
"expected_all": [
"Validasi KRS",
"Dosen Wali"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa rentang nilai angka untuk nilai huruf AB?",
"expected_all": [
"76",
"85"
],
"expected_any": [
"AB"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa bobot SKS untuk nilai huruf C?",
"expected_all": [
"2.00"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa biaya administrasi cuti akademik?",
"expected_all": [
"25%",
"UKT"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa durasi maksimal cuti akademik yang diizinkan?",
"expected_all": [
"4 semester"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa system_id pada blok metadata API referensi?",
"expected_all": [
"SIAKAD-CORE-099"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa data_domain pada blok metadata API referensi?",
"expected_all": [
"academic_policies"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa last_sync pada blok metadata API referensi?",
"expected_all": [
"2026-05-01T08:00:00Z"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Siapa nama rektor yang menandatangani dokumen ini?",
"expected_all": [],
"expected_any": [],
"forbidden": [],
"expect_abstain": true
},
{
"question": "Berapa nomor telepon helpdesk yang tercantum dalam dokumen?",
"expected_all": [],
"expected_any": [],
"forbidden": [],
"expect_abstain": true
},
{
"question": "Apa alamat kantor fisik SEVIMA yang disebutkan dalam dokumen?",
"expected_all": [],
"expected_any": [],
"forbidden": [],
"expect_abstain": true
},
{
"question": "Apa versi SOP Implementasi Tenant RAG Hub SEVIMA?",
"expected_all": [
"2.3.0"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan tanggal efektif SOP Implementasi Tenant RAG Hub SEVIMA?",
"expected_all": [
"28 April 2026"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Siapa pemilik dokumen SOP Implementasi Tenant RAG Hub SEVIMA?",
"expected_all": [
"Tim Implementation Enablement SEVIMA"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa kontak eskalasi untuk SOP Implementasi Tenant RAG Hub SEVIMA?",
"expected_all": [
"implementation-enablement@sevima.test"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa target durasi normal implementasi tenant baru sejak kickoff?",
"expected_all": [
"15 hari kerja"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan durasi implementasi tenant dapat diperpanjang menjadi 25 hari kerja?",
"expected_all": [
"lebih dari 500 dokumen"
],
"expected_any": [
"25 hari kerja"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kickoff implementasi dilakukan maksimal berapa hari kerja setelah kontrak aktif?",
"expected_all": [
"3 hari kerja"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa format Tenant ID yang benar menurut SOP implementasi tenant?",
"expected_all": [
"T-<KODE-INSTITUSI>-<NOMOR-URUT>"
],
"expected_any": [
"T-UCD-001"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa nilai wajib untuk tenant_isolation_mode?",
"expected_all": [
"strict"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa nilai default no_answer_threshold pada konfigurasi wajib tenant?",
"expected_all": [
"0.42"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apakah Campus Support boleh menghapus dokumen atau mengubah metadata?",
"expected_all": [
"tidak boleh"
],
"expected_any": [
"Campus Support"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan ingestion batch pertama boleh dilakukan?",
"expected_all": [
"80%",
"dokumen P0"
],
"expected_any": [
"lolos validasi"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa persen sampel chunk dari batch pertama yang wajib diperiksa SEVIMA Knowledge Admin?",
"expected_all": [
"10%"
],
"expected_any": [
"sampel chunk"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Kapan batch ingestion dianggap gagal berdasarkan sampel chunk?",
"expected_all": [
"12%",
"masalah struktur berat"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa jumlah minimal pertanyaan uji tenant sebelum go-live?",
"expected_all": [
"40"
],
"expected_any": [
"pertanyaan uji"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Sebutkan ambang kelulusan Answer Accuracy, Citation Correctness, No-Answer Precision, dan Access Control Accuracy sebelum go-live.",
"expected_all": [
"80%",
"88%",
"90%",
"100%"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa lama hypercare berlangsung setelah tenant aktif?",
"expected_all": [
"10 hari kerja"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa durasi minimal pelatihan Campus Admin sebelum go-live?",
"expected_all": [
"90 menit"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa syarat Support Lead tidak boleh menerima handover?",
"expected_all": [
"issue P1 terbuka"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apakah role Security Reviewer boleh digabung dengan Knowledge Admin Kampus?",
"expected_all": [
"tidak boleh"
],
"expected_any": [
"Security Reviewer"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa status yang harus diberikan pada dokumen lama yang digantikan dokumen baru tetapi masih relevan historis?",
"expected_all": [
"Deprecated"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa kode dan bobot SKS mata kuliah Dasar Pemrograman?",
"expected_all": [
"IF101",
"4 SKS"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa prasyarat mata kuliah Struktur Data?",
"expected_all": [
"Dasar Pemrograman"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa kode dan bobot SKS mata kuliah Pemrograman Web?",
"expected_all": [
"IF305",
"3 SKS"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa prasyarat mata kuliah Pemrograman Web?",
"expected_all": [
"Dasar Pemrograman",
"Basis Data"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Pada minggu ke berapa Binary Search Tree diajarkan dalam mata kuliah Struktur Data?",
"expected_all": [
"10",
"Binary Search Tree"
],
"expected_any": [
"minggu"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa bobot final project Pemrograman Web?",
"expected_all": [
"35%"
],
"expected_any": [
"Final project"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Sebutkan fitur wajib final project Pemrograman Web.",
"expected_all": [
"CRUD",
"Autentikasi pengguna",
"Session login dan logout",
"Validasi input",
"database relasional"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Berapa minimal kehadiran untuk mengikuti ujian akhir pada ketiga mata kuliah?",
"expected_all": [
"75%"
],
"expected_any": [
"kehadiran"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa aturan keterlambatan pengumpulan tugas?",
"expected_all": [
"10% per hari"
],
"expected_any": [
"lebih dari 3 hari",
"ditolak"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apakah password boleh disimpan plaintext pada proyek web?",
"expected_all": [
"plaintext"
],
"expected_any": [
"tidak boleh",
"tidak"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Apa fokus utama mata kuliah Struktur Data?",
"expected_all": [
"penyimpanan",
"pengolahan data",
"efisien"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Penggunaan AI seperti apa yang diperbolehkan pada tugas pemrograman?",
"expected_all": [
"alat bantu belajar"
],
"expected_any": [
"Meminta penjelasan konsep",
"Membantu menemukan bug",
"Membantu menyusun dokumentasi awal"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Siapa nama CEO yang menyetujui SOP Implementasi Tenant RAG Hub SEVIMA?",
"expected_all": [],
"expected_any": [],
"forbidden": [],
"expect_abstain": true
},
{
"question": "Berapa nomor telepon darurat tim Implementation Enablement?",
"expected_all": [],
"expected_any": [],
"forbidden": [],
"expect_abstain": true
},
{
"question": "What are the document ID, version, and publication month for the Transformer Architecture technical reference?",
"expected_all": [
"AI-TR-2024-047",
"3.0",
"March 2024"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "What three compounding problems did sequential RNN processing create for long sequences?",
"expected_all": [
"Parallelism bottleneck",
"Vanishing/exploding gradients",
"Memory compression"
],
"expected_any": [],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Why does scaled dot-product attention divide by sqrt(dk)?",
"expected_all": [
"softmax"
],
"expected_any": [
"near-zero gradients",
"saturated regions",
"reasonable range"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "In the Transformer base model, how many attention heads are used and what is the per-head dimension?",
"expected_all": [
"8",
"64"
],
"expected_any": [
"heads",
"dimensions"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "Why has Pre-LN become dominant in large Transformer models?",
"expected_all": [
"stable gradients"
],
"expected_any": [
"eliminates the need for learning rate warmup",
"learning rate warmup"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "What positional encoding scheme does LLaMA use, and what does it do to query and key vectors?",
"expected_all": [
"RoPE",
"query",
"key"
],
"expected_any": [
"Rotary Positional Embeddings",
"rotating query and key vectors",
"complex space"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "How many parameters does BERT-Large have, and what pre-training objective does it use?",
"expected_all": [
"340M"
],
"expected_any": [
"Masked Language Modeling",
"MLM"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "In T5 span corruption, what is the average span length and what percentage of tokens are masked?",
"expected_all": [
"3",
"15%"
],
"expected_any": [
"span length",
"tokens"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "What is QLoRA and how much GPU memory does it need to fine-tune a 65B model compared with full fine-tuning?",
"expected_all": [
"4-bit",
"NF4",
"48 GB",
"780 GB"
],
"expected_any": [
"LoRA adapters",
"frozen base weights"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "For a 7B LLaMA model at sequence length 4096, how much KV cache memory is required per request?",
"expected_all": [
"2 GB",
"4096"
],
"expected_any": [
"KV cache"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "How does FlashAttention improve attention computation and what speedup does it achieve?",
"expected_all": [
"IO-aware",
"SRAM",
"speedup"
],
"expected_any": [
"2",
"4"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
},
{
"question": "How does speculative decoding use a draft model, and what throughput improvement can it achieve?",
"expected_all": [
"draft model",
"2",
"3"
],
"expected_any": [
"candidate tokens",
"verifies",
"single forward pass"
],
"forbidden": [
"i don't know"
],
"expect_abstain": false
}
]
}