RYP / test_parser.js
Soumya79's picture
Upload 1361 files
f91a684 verified
import { MongoClient } from 'mongodb';
async function main() {
const uri = "mongodb+srv://healthbuddy:healthbuddy123@healthbuddy.sdlagac.mongodb.net/";
const client = new MongoClient(uri);
try {
await client.connect();
const db = client.db('aptitude');
const coll = db.collection('verbal_ability');
const doc = await coll.findOne({ chapter_name: "Reading Comprehension" });
const raw = doc.content;
const parts = raw.split(/(?:^|\n)(Q\d+)\.\s+/);
let currentPassage = '';
let pMatch = parts[0].match(/(?:^|\n)\d+\.\s+PRACTICE SET[^\n]*\n(?:[^\n]*Each question is followed by[^\n]*\n)?(?:[^\n]*Type \d+[^\n]*\n)?([\s\S]*)/i);
if (pMatch) {
currentPassage = pMatch[1].trim();
} else {
// Fallback: take last part of parts[0]
const lines = parts[0].split('\n');
// Let's take last 20 lines if no practice set?
currentPassage = "FALLBACK: " + parts[0].slice(-500);
}
console.log("PASSAGE 1 LENGTH:", currentPassage.length);
console.log("PASSAGE 1 START:", currentPassage.substring(0, 100));
for (let i = 1; i < parts.length; i += 2) {
const qLabel = parts[i];
const body = parts[i + 1] ?? '';
const answerIdx = body.search(/(?:^|\n)Answer:/im);
const afterAnswer = answerIdx >= 0 ? body.slice(answerIdx) : '';
const expIdx = afterAnswer.search(/(?:^|\n)Explanation:/im);
if (expIdx >= 0) {
const afterExplText = afterAnswer.slice(expIdx);
// The explanation might end, and a new passage might begin.
// How do we separate them?
// "Explanation: blah blah.\n\nNew Passage text..."
// If we split by \n\n, the first part is the explanation.
let sections = afterExplText.split(/\n{2,}/);
let explanation = sections[0].replace(/(?:^|\n)Explanation:\s*/im, '').trim();
let possibleNextPassage = sections.slice(1).join('\n\n').trim();
if (possibleNextPassage.length > 50) {
console.log(`\n--- FOUND PASSAGE BEFORE Q${parseInt(qLabel.substring(1))+1} ---`);
console.log(possibleNextPassage.substring(0, 100));
}
}
}
} catch(e) {
console.error(e);
} finally {
await client.close();
}
}
main();