File size: 5,657 Bytes
f91a684
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

  function formatPassageTables(text) { return text; }
  function parseAptitudeContent(raw) {
  let contentToParse = raw;
  const revisionMatch = contentToParse.match(/(?:^|\n)\d+\.\s+QUICK REVISION CARD/i);
  if (revisionMatch && revisionMatch.index !== undefined) {
    contentToParse = contentToParse.slice(0, revisionMatch.index);
  }

  contentToParse = formatPassageTables(contentToParse);

  const parts = contentToParse.split(/(?:^|\n)(Q\d+)\.\s+/);
  const questions = [];

  let currentPassage = '';
  if (parts.length > 0) {
    const rawIntro = parts[0];
    const pMatch = rawIntro.match(/(?:^|\n)[*#\s]*(?:\d+\.)?[*#\s]*PRACTICE SET[^\n]*\n(?:[^\n]*Each question is followed by[^\n]*\n)?(?:[^\n]*Type \d+[^\n]*\n)?([\s\S]*)/i);
    const potentialPassage = pMatch ? pMatch[1].trim() : rawIntro.trim();
    const introSections = potentialPassage.split(/\n{2,}/);
    let foundPassage = '';

    for (let j = introSections.length - 1; j >= 0; j--) {
      const sec = introSections[j].trim();
      if (/^(?:Directions|Passage|Read the|Set \d+|Study the)/i.test(sec)) {
        foundPassage = introSections.slice(j).join('\n\n');
        break;
      }
    }

    if (foundPassage) currentPassage = foundPassage;
    else if (pMatch) currentPassage = potentialPassage;
    else if (potentialPassage.length < 500) currentPassage = potentialPassage;
    else currentPassage = '';
  }

  for (let i = 1; i < parts.length; i += 2) {
    const qLabel = parts[i];
    const body = parts[i + 1] ?? '';

    const answerIdx = body.search(/(?:^|\n)Answer:/im);
    const questionPart = answerIdx >= 0 ? body.slice(0, answerIdx) : body;
    const afterAnswer = answerIdx >= 0 ? body.slice(answerIdx) : '';

    const optRegex = /^[ \t]*([A-Ea-e][.)]\s+[^\n]*)/gm;
    let options[] = [];
    let firstOptPos = -1;
    let m;
    while ((m = optRegex.exec(questionPart)) !== null) {
      if (firstOptPos < 0) firstOptPos = m.index;
      options.push(m[1].trim());
    }
    if (options.length === 1) { options.length = 0; firstOptPos = -1; }

    // If start-of-line failed, try to extract inline options (e.g. "A) 1 B) 2 C) 3 D) 4" all on one line)
    if (options.length === 0) {
      const inlineOptRegex = /(?:^|\s+)([A-Ea-e][.)]\s+(?:(?!\s+[A-Ea-e][.)]\s+)[\s\S])*)/g;
      const tempOptions: { text; index: number; letter }[] = [];
      let m2;
      while ((m2 = inlineOptRegex.exec(questionPart)) !== null) {
        tempOptions.push({
          text: m2[1].trim(),
          index: m2.index,
          letter: m2[1].charAt(0).toUpperCase(),
        });
      }

      const startIdx = tempOptions.findIndex((o) => o.letter === 'A');
      if (startIdx >= 0) {
        const validOpts = [];
        let expectedLetterCode = 65; // 'A'
        for (let k = startIdx; k < tempOptions.length; k++) {
          if (tempOptions[k].letter.charCodeAt(0) === expectedLetterCode) {
            validOpts.push(tempOptions[k]);
            expectedLetterCode++;
          } else {
            break;
          }
        }
        if (validOpts.length >= 2) {
          firstOptPos = validOpts[0].index;
          options = validOpts.map((o) => o.text);
        }
      }
    }

    const questionText = firstOptPos > 0 ? questionPart.slice(0, firstOptPos).trim() : questionPart.trim();

    const expIdx = afterAnswer.search(/(?:^|\n)Explanation:/im);
    let answerRaw = afterAnswer;
    let explanation = '';
    let nextPassage = currentPassage;

    if (expIdx >= 0) {
      answerRaw = afterAnswer.slice(0, expIdx);
      const afterExplText = afterAnswer.slice(expIdx);
      const sections = afterExplText.split(/\n{2,}/);
      explanation = sections[0].replace(/(?:^|\n)Explanation:\s*/im, '').trim();
      const possiblePassage = sections.slice(1).join('\n\n').trim();
      if (possiblePassage.length > 0) {
        const isPassageMarker = /^(?:Directions|Passage|Read the|Set \d+|Use the following|BAR GRAPH|PIE CHART|LINE GRAPH|CASELET|MIXED DI|RADAR CHART|Student|Department|A company|The following|Refer to|Study the)/i.test(possiblePassage);
        const hasPipeTable = possiblePassage.split('\n').some(l => (l.includes('|') || l.includes('│')) && l.split(/\||│/).length >= 3);
        if (isPassageMarker || hasPipeTable) nextPassage = possiblePassage;
        else if (possiblePassage.length > 250) nextPassage = possiblePassage;
        else explanation += '\n\n' + possiblePassage;
      }
    } else {
      answerRaw = afterAnswer;
    }

    const answer = answerRaw.replace(/(?:^|\n)Answer:\s*/im, '').trim();

    if (options.length === 0) {
      const hasA = /\([A-Ea-e]\)/.test(questionText);
      const isAnsLetter = /^[A-Ea-e]$/.test(answer);
      if (hasA || isAnsLetter) {
        let maxLetter = 'D';
        if (/\([Ee]\)/.test(questionText) || answer.toUpperCase() === 'E') maxLetter = 'E';
        const numOpts = maxLetter === 'E' ? 5 : 4;
        for (let j = 0; j < numOpts; j++) options.push(String.fromCharCode(65 + j));
      }
    }

    if (!questionText) continue;
    questions.push({ id: `${qLabel}-${i}`, passage: currentPassage, questionText, options, answer, explanation });
    currentPassage = nextPassage;
  }
  return questions;
}
  
  const testText = `Q1. What were the total sales in 2020?
Statement 1: Q3 sales in 2020 were ₹35 lakhs.
Statement 2: Total sales in 2021 were ₹140 lakhs.
A) Statement 1 alone   B) Statement 2 alone   C) Both together   D) Either alone   E) Neither
Answer: A) Statement 1 alone
Explanation: 2020 total = 25+30+Q3+20. With Q3=35 → total=110. St1 alone sufficient. Answer A.`;
  
  console.log(JSON.stringify(parseAptitudeContent(testText), null, 2));