{ "course_id": "course-eval-five-pdfs-001", "documents": [ { "path": "doc/samples/Dokumen Uji RAG Hub Sevima.pdf" }, { "path": "doc/samples/Dokumen Uji_RAG_SEVIMA_Kebijakan_Akademik.pdf" }, { "path": "doc/samples/Sop Implementasi Tenant Rag Hub Sevima.pdf" }, { "path": "doc/samples/Dokumen Rag Mata Kuliah Pemrograman Web Struktur Data Dasar Pemrograman.pdf" }, { "path": "doc/samples/Transformer Architecture.pdf" } ], "questions": [ { "question": "Apa nama platform yang dijelaskan dalam dokumen spesifikasi RAG Hub SEVIMA?", "expected_all": [ "RAG Hub SEVIMA" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa versi dokumen spesifikasi dan operasional RAG Hub SEVIMA?", "expected_all": [ "1.4.2" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan tanggal rilis dokumen RAG Hub SEVIMA?", "expected_all": [ "17 April 2026" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Siapa pemilik dokumen RAG Hub SEVIMA?", "expected_all": [ "Tim Platform Knowledge SEVIMA" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa email kontak eskalasi untuk RAG Hub SEVIMA?", "expected_all": [ "rag-platform@sevima.test" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa target keberhasilan fase pertama RAG Hub dalam menjawab pertanyaan uji berbasis dokumen?", "expected_all": [ "78%" ], "expected_any": [ "minimal" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Sebutkan tiga kelompok pengguna utama pada fase pertama RAG Hub.", "expected_all": [ "Tim Implementasi Kampus", "Tim Customer Support", "Tim Product dan Engineering" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa vector database utama pada RAG Hub versi 1.4.2?", "expected_all": [ "Qdrant" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa keyword search yang digunakan RAG Hub?", "expected_all": [ "PostgreSQL Full-Text Search" ], "expected_any": [ "PostgreSQL" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa ukuran maksimum file yang dijelaskan pada batasan file RAG Hub?", "expected_all": [ "40 MB" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa jumlah halaman maksimum per dokumen PDF pada RAG Hub?", "expected_all": [ "300" ], "expected_any": [ "halaman" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan registrasi ulang mahasiswa wajib dilakukan?", "expected_all": [ "awal semester ganjil dan genap" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa akibat jika mahasiswa gagal melakukan registrasi ulang?", "expected_all": [ "non-aktif" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan batas maksimal pembayaran UKT untuk semester ganjil dan genap?", "expected_all": [ "minggu kedua", "Agustus", "Februari" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa syarat mutlak sebelum perkuliahan dimulai terkait KRS?", "expected_all": [ "Validasi KRS", "Dosen Wali" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa rentang nilai angka untuk nilai huruf AB?", "expected_all": [ "76", "85" ], "expected_any": [ "AB" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa bobot SKS untuk nilai huruf C?", "expected_all": [ "2.00" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa biaya administrasi cuti akademik?", "expected_all": [ "25%", "UKT" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa durasi maksimal cuti akademik yang diizinkan?", "expected_all": [ "4 semester" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa system_id pada blok metadata API referensi?", "expected_all": [ "SIAKAD-CORE-099" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa data_domain pada blok metadata API referensi?", "expected_all": [ "academic_policies" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa last_sync pada blok metadata API referensi?", "expected_all": [ "2026-05-01T08:00:00Z" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Siapa nama rektor yang menandatangani dokumen ini?", "expected_all": [], "expected_any": [], "forbidden": [], "expect_abstain": true }, { "question": "Berapa nomor telepon helpdesk yang tercantum dalam dokumen?", "expected_all": [], "expected_any": [], "forbidden": [], "expect_abstain": true }, { "question": "Apa alamat kantor fisik SEVIMA yang disebutkan dalam dokumen?", "expected_all": [], "expected_any": [], "forbidden": [], "expect_abstain": true }, { "question": "Apa versi SOP Implementasi Tenant RAG Hub SEVIMA?", "expected_all": [ "2.3.0" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan tanggal efektif SOP Implementasi Tenant RAG Hub SEVIMA?", "expected_all": [ "28 April 2026" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Siapa pemilik dokumen SOP Implementasi Tenant RAG Hub SEVIMA?", "expected_all": [ "Tim Implementation Enablement SEVIMA" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa kontak eskalasi untuk SOP Implementasi Tenant RAG Hub SEVIMA?", "expected_all": [ "implementation-enablement@sevima.test" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa target durasi normal implementasi tenant baru sejak kickoff?", "expected_all": [ "15 hari kerja" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan durasi implementasi tenant dapat diperpanjang menjadi 25 hari kerja?", "expected_all": [ "lebih dari 500 dokumen" ], "expected_any": [ "25 hari kerja" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kickoff implementasi dilakukan maksimal berapa hari kerja setelah kontrak aktif?", "expected_all": [ "3 hari kerja" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa format Tenant ID yang benar menurut SOP implementasi tenant?", "expected_all": [ "T--" ], "expected_any": [ "T-UCD-001" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa nilai wajib untuk tenant_isolation_mode?", "expected_all": [ "strict" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa nilai default no_answer_threshold pada konfigurasi wajib tenant?", "expected_all": [ "0.42" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apakah Campus Support boleh menghapus dokumen atau mengubah metadata?", "expected_all": [ "tidak boleh" ], "expected_any": [ "Campus Support" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan ingestion batch pertama boleh dilakukan?", "expected_all": [ "80%", "dokumen P0" ], "expected_any": [ "lolos validasi" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa persen sampel chunk dari batch pertama yang wajib diperiksa SEVIMA Knowledge Admin?", "expected_all": [ "10%" ], "expected_any": [ "sampel chunk" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Kapan batch ingestion dianggap gagal berdasarkan sampel chunk?", "expected_all": [ "12%", "masalah struktur berat" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa jumlah minimal pertanyaan uji tenant sebelum go-live?", "expected_all": [ "40" ], "expected_any": [ "pertanyaan uji" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Sebutkan ambang kelulusan Answer Accuracy, Citation Correctness, No-Answer Precision, dan Access Control Accuracy sebelum go-live.", "expected_all": [ "80%", "88%", "90%", "100%" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa lama hypercare berlangsung setelah tenant aktif?", "expected_all": [ "10 hari kerja" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa durasi minimal pelatihan Campus Admin sebelum go-live?", "expected_all": [ "90 menit" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa syarat Support Lead tidak boleh menerima handover?", "expected_all": [ "issue P1 terbuka" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apakah role Security Reviewer boleh digabung dengan Knowledge Admin Kampus?", "expected_all": [ "tidak boleh" ], "expected_any": [ "Security Reviewer" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa status yang harus diberikan pada dokumen lama yang digantikan dokumen baru tetapi masih relevan historis?", "expected_all": [ "Deprecated" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa kode dan bobot SKS mata kuliah Dasar Pemrograman?", "expected_all": [ "IF101", "4 SKS" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa prasyarat mata kuliah Struktur Data?", "expected_all": [ "Dasar Pemrograman" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa kode dan bobot SKS mata kuliah Pemrograman Web?", "expected_all": [ "IF305", "3 SKS" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa prasyarat mata kuliah Pemrograman Web?", "expected_all": [ "Dasar Pemrograman", "Basis Data" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Pada minggu ke berapa Binary Search Tree diajarkan dalam mata kuliah Struktur Data?", "expected_all": [ "10", "Binary Search Tree" ], "expected_any": [ "minggu" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa bobot final project Pemrograman Web?", "expected_all": [ "35%" ], "expected_any": [ "Final project" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Sebutkan fitur wajib final project Pemrograman Web.", "expected_all": [ "CRUD", "Autentikasi pengguna", "Session login dan logout", "Validasi input", "database relasional" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Berapa minimal kehadiran untuk mengikuti ujian akhir pada ketiga mata kuliah?", "expected_all": [ "75%" ], "expected_any": [ "kehadiran" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa aturan keterlambatan pengumpulan tugas?", "expected_all": [ "10% per hari" ], "expected_any": [ "lebih dari 3 hari", "ditolak" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apakah password boleh disimpan plaintext pada proyek web?", "expected_all": [ "plaintext" ], "expected_any": [ "tidak boleh", "tidak" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Apa fokus utama mata kuliah Struktur Data?", "expected_all": [ "penyimpanan", "pengolahan data", "efisien" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Penggunaan AI seperti apa yang diperbolehkan pada tugas pemrograman?", "expected_all": [ "alat bantu belajar" ], "expected_any": [ "Meminta penjelasan konsep", "Membantu menemukan bug", "Membantu menyusun dokumentasi awal" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Siapa nama CEO yang menyetujui SOP Implementasi Tenant RAG Hub SEVIMA?", "expected_all": [], "expected_any": [], "forbidden": [], "expect_abstain": true }, { "question": "Berapa nomor telepon darurat tim Implementation Enablement?", "expected_all": [], "expected_any": [], "forbidden": [], "expect_abstain": true }, { "question": "What are the document ID, version, and publication month for the Transformer Architecture technical reference?", "expected_all": [ "AI-TR-2024-047", "3.0", "March 2024" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "What three compounding problems did sequential RNN processing create for long sequences?", "expected_all": [ "Parallelism bottleneck", "Vanishing/exploding gradients", "Memory compression" ], "expected_any": [], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Why does scaled dot-product attention divide by sqrt(dk)?", "expected_all": [ "softmax" ], "expected_any": [ "near-zero gradients", "saturated regions", "reasonable range" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "In the Transformer base model, how many attention heads are used and what is the per-head dimension?", "expected_all": [ "8", "64" ], "expected_any": [ "heads", "dimensions" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "Why has Pre-LN become dominant in large Transformer models?", "expected_all": [ "stable gradients" ], "expected_any": [ "eliminates the need for learning rate warmup", "learning rate warmup" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "What positional encoding scheme does LLaMA use, and what does it do to query and key vectors?", "expected_all": [ "RoPE", "query", "key" ], "expected_any": [ "Rotary Positional Embeddings", "rotating query and key vectors", "complex space" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "How many parameters does BERT-Large have, and what pre-training objective does it use?", "expected_all": [ "340M" ], "expected_any": [ "Masked Language Modeling", "MLM" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "In T5 span corruption, what is the average span length and what percentage of tokens are masked?", "expected_all": [ "3", "15%" ], "expected_any": [ "span length", "tokens" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "What is QLoRA and how much GPU memory does it need to fine-tune a 65B model compared with full fine-tuning?", "expected_all": [ "4-bit", "NF4", "48 GB", "780 GB" ], "expected_any": [ "LoRA adapters", "frozen base weights" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "For a 7B LLaMA model at sequence length 4096, how much KV cache memory is required per request?", "expected_all": [ "2 GB", "4096" ], "expected_any": [ "KV cache" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "How does FlashAttention improve attention computation and what speedup does it achieve?", "expected_all": [ "IO-aware", "SRAM", "speedup" ], "expected_any": [ "2", "4" ], "forbidden": [ "i don't know" ], "expect_abstain": false }, { "question": "How does speculative decoding use a draft model, and what throughput improvement can it achieve?", "expected_all": [ "draft model", "2", "3" ], "expected_any": [ "candidate tokens", "verifies", "single forward pass" ], "forbidden": [ "i don't know" ], "expect_abstain": false } ] }