RYP / scratch_parser.js
Soumya79's picture
Upload 1361 files
f91a684 verified
import { MongoClient } from 'mongodb';
async function main() {
const uri = "mongodb+srv://healthbuddy:healthbuddy123@healthbuddy.sdlagac.mongodb.net/";
const client = new MongoClient(uri);
try {
await client.connect();
const db = client.db('aptitude');
const coll = db.collection('quantitative_aptitude');
const doc = await coll.findOne({ chapter_no: 1 });
let text = doc.content;
const match = text.match(/(?:^|\n)Q\d+\.\s+/);
if (match && match.index !== undefined) {
text = text.slice(0, match.index).trim();
}
// Markdownify
let md = text;
// 1. Main headings (e.g., "1. CONCEPT", "2. FORMULAS")
md = md.replace(/^(\d+)\.\s+([A-Z0-9\s&]+)$/gm, '## $1. $2');
// 2. Convert Tab-separated lines into Markdown Tables
// A line with tabs should be part of a table.
const lines = md.split('\n');
let inTable = false;
let outLines = [];
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
// If line has a tab, treat it as a table row
if (line.includes('\t')) {
if (!inTable) {
inTable = true;
// Format header row
let cells = line.split('\t').map(c => c.trim());
outLines.push('| ' + cells.join(' | ') + ' |');
outLines.push('| ' + cells.map(() => '---').join(' | ') + ' |');
} else {
// Format data row
let cells = line.split('\t').map(c => c.trim());
// Sometimes trick lines start with T1\t... we should treat them as headers if they are tricks?
// Wait, tricks are formatted as "T1 \t Trick Name \n description". We'll handle them separately if needed.
outLines.push('| ' + cells.join(' | ') + ' |');
}
} else {
if (inTable) {
inTable = false;
outLines.push(''); // spacing
}
// 3. Subheadings (short lines without punctuation at the end, not empty)
// Heuristic: line length < 60, no period/comma/colon at the end, doesn't start with '#' or '|'
if (line.length > 0 && line.length < 60 && !/[.,:;]$/.test(line.trim()) && !line.startsWith('#') && !line.startsWith('|')) {
// Is it just regular text? Let's see...
// We'll mark it bold or H3 if it's very distinct.
// Let's specifically target "Method X:", "Type X", "Core Definition", etc.
if (/^(Type \d+|Method \d+|T\d+|Core Definition|Key Relationships|Basic Formulas|Percentage Change|Reverse Percentage|Successive Percentage Change|Population & Depreciation|Comparison Between Two Quantities|Expenditure & Consumption|Percentage Difference)/i.test(line)) {
line = '### ' + line;
}
}
outLines.push(line);
}
}
md = outLines.join('\n');
console.log(md.substring(0, 2000)); // Print start
} catch(e) {
console.error(e);
} finally {
await client.close();
}
}
main();