docuchat-oracle / script.js
sathaye3's picture
not answering even tho there is information in the document
ee8dccf verified
// Document content storage
let documentContent = '';
// Process uploaded document
async function processDocument(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onload = (e) => {
documentContent = e.target.result;
resolve({ success: true });
};
reader.readAsText(file);
});
}
// Enhanced document search with better matching and context
function searchDocument(question) {
if (!documentContent) return "Please upload a document first.";
const questionLower = question.toLowerCase();
const docLower = documentContent.toLowerCase();
// Split into meaningful units (paragraphs or sections)
const sections = documentContent.split(/\n\s*\n+/);
// First try exact matches or very close matches
for (const section of sections) {
const sectionLower = section.toLowerCase();
// Check for direct question matches
if (sectionLower.includes(questionLower)) {
return extractBestAnswer(section, questionLower);
}
// Check for "what is" questions
if (questionLower.startsWith('what is') || questionLower.startsWith('what are')) {
const targetTerm = questionLower.split(' ').slice(2).join(' ');
if (sectionLower.includes(targetTerm)) {
return extractDefinition(section, targetTerm);
}
}
// Check for "how to" questions
if (questionLower.startsWith('how to') || questionLower.startsWith('how do')) {
const action = questionLower.split(' ').slice(2).join(' ');
if (sectionLower.includes(action)) {
return extractInstructions(section, action);
}
}
}
// Try semantic matching with weighted keywords
const keywords = extractKeywords(questionLower);
let bestMatch = { text: '', score: 0 };
for (const section of sections) {
const sectionLower = section.toLowerCase();
let score = 0;
for (const keyword of keywords) {
if (sectionLower.includes(keyword)) {
score += keyword.weight;
// Bonus for proximity to other keywords
const keywordPos = sectionLower.indexOf(keyword);
for (const otherKeyword of keywords) {
if (otherKeyword !== keyword) {
const otherPos = sectionLower.indexOf(otherKeyword);
if (otherPos !== -1 && Math.abs(keywordPos - otherPos) < 100) {
score += 5;
}
}
}
}
}
if (score > bestMatch.score) {
bestMatch = { text: section, score };
}
}
if (bestMatch.score > 0) {
return extractRelevantPart(bestMatch.text, keywords);
}
// As last resort, try to find any relevant information
const relevant = findAnyRelevant(questionLower, sections);
if (relevant) return relevant;
return "I found some information that might be relevant, but couldn't find a direct answer to your question. Try asking differently or check these sections:\n" +
findPossibleSections(questionLower, sections);
}
// Helper functions
function extractKeywords(question) {
const words = question.split(' ').filter(w => w.length > 3);
const weights = {
'what': 1, 'how': 2, 'why': 2, 'when': 2, 'where': 2,
'is': 1, 'are': 1, 'does': 2, 'do': 1, 'can': 2,
'explain': 3, 'describe': 3, 'define': 3, 'list': 2
};
return words.map(word => ({
word: word,
weight: weights[word] || 1
}));
}
function extractBestAnswer(text, question) {
// Split into sentences
const sentences = text.split(/[.!?]+/).map(s => s.trim()).filter(s => s);
// Find the most relevant sentence
for (const sentence of sentences) {
if (sentence.toLowerCase().includes(question)) {
return sentence + '.';
}
}
// If no exact match, return first few sentences
return sentences.slice(0, 3).join(' ') + '...';
}
function extractDefinition(text, term) {
const sentences = text.split(/[.!?]+/);
for (let i = 0; i < sentences.length; i++) {
if (sentences[i].toLowerCase().includes(term)) {
// Try to get the definition sentence
if (sentences[i].toLowerCase().includes('is') || sentences[i].toLowerCase().includes('are')) {
return sentences[i].trim() + '.';
}
// Or combine with next sentence
if (i < sentences.length - 1) {
return (sentences[i] + ' ' + sentences[i+1]).trim() + '.';
}
}
}
return text.split('\n')[0]; // Fallback to first line
}
function findAnyRelevant(question, sections) {
const keywords = extractKeywords(question).map(k => k.word);
for (const section of sections) {
const sectionLower = section.toLowerCase();
let matches = 0;
for (const keyword of keywords) {
if (sectionLower.includes(keyword)) matches++;
}
if (matches >= keywords.length / 2) {
return "This might help:\n" + section.split('\n').slice(0, 3).join('\n') + '...';
}
}
return null;
}
function findPossibleSections(question, sections) {
const keywords = extractKeywords(question).map(k => k.word);
const relevantSections = [];
for (const section of sections) {
const sectionLower = section.toLowerCase();
let matches = 0;
for (const keyword of keywords) {
if (sectionLower.includes(keyword)) matches++;
}
if (matches > 0) {
const firstLine = section.split('\n')[0];
if (firstLine && !relevantSections.includes(firstLine)) {
relevantSections.push(firstLine);
}
}
}
return relevantSections.slice(0, 3).map(s => "• " + s).join('\n');
}
async function askQuestion(question) {
if (!documentContent) {
return { answer: "Please upload a document first." };
}
const answer = searchDocument(question);
// Enhance simple answers with more context
let enhancedAnswer = answer;
if (answer.includes('is') && !answer.includes('.')) {
// Try to find the next sentence for more context
const sentences = documentContent.split(/[.!?]+/);
const idx = sentences.findIndex(s => s.trim() + '.' === answer);
if (idx !== -1 && idx < sentences.length - 1) {
enhancedAnswer += ' ' + sentences[idx+1].trim() + '.';
}
}
return { answer: enhancedAnswer };
}