sathaye3 commited on
Commit
ee8dccf
·
verified ·
1 Parent(s): 601b758

not answering even tho there is information in the document

Browse files
Files changed (1) hide show
  1. script.js +140 -35
script.js CHANGED
@@ -13,66 +13,171 @@ async function processDocument(file) {
13
  reader.readAsText(file);
14
  });
15
  }
16
- // Enhanced document search function with semantic matching
17
  function searchDocument(question) {
18
  if (!documentContent) return "Please upload a document first.";
19
 
20
  const questionLower = question.toLowerCase();
21
  const docLower = documentContent.toLowerCase();
22
 
23
- // Split into paragraphs first for better context
24
- const paragraphs = documentContent.split('\n\n');
25
 
26
- // Search for direct matches first
27
- for (const paragraph of paragraphs) {
28
- const paraLower = paragraph.toLowerCase();
29
 
30
- if (paraLower.includes(questionLower) ||
31
- (questionLower.includes('what') && paraLower.includes('is') && paraLower.includes(questionLower.split('what is ')[1]))) {
32
- // Clean up the paragraph and return the most relevant sentence
33
- const sentences = paragraph.split(/[.!?]+/);
34
- for (const sentence of sentences) {
35
- if (sentence.toLowerCase().includes(questionLower)) {
36
- return sentence.trim() + '.';
37
- }
 
 
 
 
 
 
 
 
 
 
38
  }
39
- return paragraph.trim();
40
  }
41
  }
42
 
43
- // If no direct match, try to find related content
44
- const keywords = questionLower.split(' ');
45
- let bestMatch = '';
46
- let highestScore = 0;
47
 
48
- for (const paragraph of paragraphs) {
49
- const paraLower = paragraph.toLowerCase();
50
  let score = 0;
51
 
52
- for (const word of keywords) {
53
- if (word.length > 3 && paraLower.includes(word)) {
54
- score++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- if (score > highestScore) {
59
- highestScore = score;
60
- bestMatch = paragraph;
61
  }
62
  }
63
 
64
- if (bestMatch) {
65
- // Extract the most relevant sentence
66
- const sentences = bestMatch.split(/[.!?]+/);
67
- for (const sentence of sentences) {
68
- if (sentence.toLowerCase().includes(keywords[0])) {
69
- return sentence.trim() + '.';
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  }
72
- return bestMatch.trim();
73
  }
74
 
75
- return "I couldn't find a specific answer in the document. Please try rephrasing your question.";
76
  }
77
  async function askQuestion(question) {
78
  if (!documentContent) {
 
13
  reader.readAsText(file);
14
  });
15
  }
16
+ // Enhanced document search with better matching and context
17
  function searchDocument(question) {
18
  if (!documentContent) return "Please upload a document first.";
19
 
20
  const questionLower = question.toLowerCase();
21
  const docLower = documentContent.toLowerCase();
22
 
23
+ // Split into meaningful units (paragraphs or sections)
24
+ const sections = documentContent.split(/\n\s*\n+/);
25
 
26
+ // First try exact matches or very close matches
27
+ for (const section of sections) {
28
+ const sectionLower = section.toLowerCase();
29
 
30
+ // Check for direct question matches
31
+ if (sectionLower.includes(questionLower)) {
32
+ return extractBestAnswer(section, questionLower);
33
+ }
34
+
35
+ // Check for "what is" questions
36
+ if (questionLower.startsWith('what is') || questionLower.startsWith('what are')) {
37
+ const targetTerm = questionLower.split(' ').slice(2).join(' ');
38
+ if (sectionLower.includes(targetTerm)) {
39
+ return extractDefinition(section, targetTerm);
40
+ }
41
+ }
42
+
43
+ // Check for "how to" questions
44
+ if (questionLower.startsWith('how to') || questionLower.startsWith('how do')) {
45
+ const action = questionLower.split(' ').slice(2).join(' ');
46
+ if (sectionLower.includes(action)) {
47
+ return extractInstructions(section, action);
48
  }
 
49
  }
50
  }
51
 
52
+ // Try semantic matching with weighted keywords
53
+ const keywords = extractKeywords(questionLower);
54
+ let bestMatch = { text: '', score: 0 };
 
55
 
56
+ for (const section of sections) {
57
+ const sectionLower = section.toLowerCase();
58
  let score = 0;
59
 
60
+ for (const keyword of keywords) {
61
+ if (sectionLower.includes(keyword)) {
62
+ score += keyword.weight;
63
+
64
+ // Bonus for proximity to other keywords
65
+ const keywordPos = sectionLower.indexOf(keyword);
66
+ for (const otherKeyword of keywords) {
67
+ if (otherKeyword !== keyword) {
68
+ const otherPos = sectionLower.indexOf(otherKeyword);
69
+ if (otherPos !== -1 && Math.abs(keywordPos - otherPos) < 100) {
70
+ score += 5;
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ if (score > bestMatch.score) {
78
+ bestMatch = { text: section, score };
79
+ }
80
+ }
81
+
82
+ if (bestMatch.score > 0) {
83
+ return extractRelevantPart(bestMatch.text, keywords);
84
+ }
85
+
86
+ // As last resort, try to find any relevant information
87
+ const relevant = findAnyRelevant(questionLower, sections);
88
+ if (relevant) return relevant;
89
+
90
+ return "I found some information that might be relevant, but couldn't find a direct answer to your question. Try asking differently or check these sections:\n" +
91
+ findPossibleSections(questionLower, sections);
92
+ }
93
+
94
+ // Helper functions
95
+ function extractKeywords(question) {
96
+ const words = question.split(' ').filter(w => w.length > 3);
97
+ const weights = {
98
+ 'what': 1, 'how': 2, 'why': 2, 'when': 2, 'where': 2,
99
+ 'is': 1, 'are': 1, 'does': 2, 'do': 1, 'can': 2,
100
+ 'explain': 3, 'describe': 3, 'define': 3, 'list': 2
101
+ };
102
+
103
+ return words.map(word => ({
104
+ word: word,
105
+ weight: weights[word] || 1
106
+ }));
107
+ }
108
+
109
+ function extractBestAnswer(text, question) {
110
+ // Split into sentences
111
+ const sentences = text.split(/[.!?]+/).map(s => s.trim()).filter(s => s);
112
+
113
+ // Find the most relevant sentence
114
+ for (const sentence of sentences) {
115
+ if (sentence.toLowerCase().includes(question)) {
116
+ return sentence + '.';
117
+ }
118
+ }
119
+
120
+ // If no exact match, return first few sentences
121
+ return sentences.slice(0, 3).join(' ') + '...';
122
+ }
123
+
124
+ function extractDefinition(text, term) {
125
+ const sentences = text.split(/[.!?]+/);
126
+ for (let i = 0; i < sentences.length; i++) {
127
+ if (sentences[i].toLowerCase().includes(term)) {
128
+ // Try to get the definition sentence
129
+ if (sentences[i].toLowerCase().includes('is') || sentences[i].toLowerCase().includes('are')) {
130
+ return sentences[i].trim() + '.';
131
+ }
132
+ // Or combine with next sentence
133
+ if (i < sentences.length - 1) {
134
+ return (sentences[i] + ' ' + sentences[i+1]).trim() + '.';
135
  }
136
  }
137
+ }
138
+ return text.split('\n')[0]; // Fallback to first line
139
+ }
140
+
141
+ function findAnyRelevant(question, sections) {
142
+ const keywords = extractKeywords(question).map(k => k.word);
143
+
144
+ for (const section of sections) {
145
+ const sectionLower = section.toLowerCase();
146
+ let matches = 0;
147
+
148
+ for (const keyword of keywords) {
149
+ if (sectionLower.includes(keyword)) matches++;
150
+ }
151
 
152
+ if (matches >= keywords.length / 2) {
153
+ return "This might help:\n" + section.split('\n').slice(0, 3).join('\n') + '...';
 
154
  }
155
  }
156
 
157
+ return null;
158
+ }
159
+
160
+ function findPossibleSections(question, sections) {
161
+ const keywords = extractKeywords(question).map(k => k.word);
162
+ const relevantSections = [];
163
+
164
+ for (const section of sections) {
165
+ const sectionLower = section.toLowerCase();
166
+ let matches = 0;
167
+
168
+ for (const keyword of keywords) {
169
+ if (sectionLower.includes(keyword)) matches++;
170
+ }
171
+
172
+ if (matches > 0) {
173
+ const firstLine = section.split('\n')[0];
174
+ if (firstLine && !relevantSections.includes(firstLine)) {
175
+ relevantSections.push(firstLine);
176
  }
177
  }
 
178
  }
179
 
180
+ return relevantSections.slice(0, 3).map(s => "• " + s).join('\n');
181
  }
182
  async function askQuestion(question) {
183
  if (!documentContent) {