Spaces:
Running
Running
| // Document content storage | |
| let documentContent = ''; | |
| // Process uploaded document | |
| async function processDocument(file) { | |
| return new Promise((resolve) => { | |
| const reader = new FileReader(); | |
| reader.onload = (e) => { | |
| documentContent = e.target.result; | |
| resolve({ success: true }); | |
| }; | |
| reader.readAsText(file); | |
| }); | |
| } | |
| // Enhanced document search with better matching and context | |
| function searchDocument(question) { | |
| if (!documentContent) return "Please upload a document first."; | |
| const questionLower = question.toLowerCase(); | |
| const docLower = documentContent.toLowerCase(); | |
| // Split into meaningful units (paragraphs or sections) | |
| const sections = documentContent.split(/\n\s*\n+/); | |
| // First try exact matches or very close matches | |
| for (const section of sections) { | |
| const sectionLower = section.toLowerCase(); | |
| // Check for direct question matches | |
| if (sectionLower.includes(questionLower)) { | |
| return extractBestAnswer(section, questionLower); | |
| } | |
| // Check for "what is" questions | |
| if (questionLower.startsWith('what is') || questionLower.startsWith('what are')) { | |
| const targetTerm = questionLower.split(' ').slice(2).join(' '); | |
| if (sectionLower.includes(targetTerm)) { | |
| return extractDefinition(section, targetTerm); | |
| } | |
| } | |
| // Check for "how to" questions | |
| if (questionLower.startsWith('how to') || questionLower.startsWith('how do')) { | |
| const action = questionLower.split(' ').slice(2).join(' '); | |
| if (sectionLower.includes(action)) { | |
| return extractInstructions(section, action); | |
| } | |
| } | |
| } | |
| // Try semantic matching with weighted keywords | |
| const keywords = extractKeywords(questionLower); | |
| let bestMatch = { text: '', score: 0 }; | |
| for (const section of sections) { | |
| const sectionLower = section.toLowerCase(); | |
| let score = 0; | |
| for (const keyword of keywords) { | |
| if (sectionLower.includes(keyword)) { | |
| score += keyword.weight; | |
| // Bonus for proximity to other keywords | |
| const keywordPos = sectionLower.indexOf(keyword); | |
| for (const otherKeyword of keywords) { | |
| if (otherKeyword !== keyword) { | |
| const otherPos = sectionLower.indexOf(otherKeyword); | |
| if (otherPos !== -1 && Math.abs(keywordPos - otherPos) < 100) { | |
| score += 5; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| if (score > bestMatch.score) { | |
| bestMatch = { text: section, score }; | |
| } | |
| } | |
| if (bestMatch.score > 0) { | |
| return extractRelevantPart(bestMatch.text, keywords); | |
| } | |
| // As last resort, try to find any relevant information | |
| const relevant = findAnyRelevant(questionLower, sections); | |
| if (relevant) return relevant; | |
| return "I found some information that might be relevant, but couldn't find a direct answer to your question. Try asking differently or check these sections:\n" + | |
| findPossibleSections(questionLower, sections); | |
| } | |
| // Helper functions | |
| function extractKeywords(question) { | |
| const words = question.split(' ').filter(w => w.length > 3); | |
| const weights = { | |
| 'what': 1, 'how': 2, 'why': 2, 'when': 2, 'where': 2, | |
| 'is': 1, 'are': 1, 'does': 2, 'do': 1, 'can': 2, | |
| 'explain': 3, 'describe': 3, 'define': 3, 'list': 2 | |
| }; | |
| return words.map(word => ({ | |
| word: word, | |
| weight: weights[word] || 1 | |
| })); | |
| } | |
| function extractBestAnswer(text, question) { | |
| // Split into sentences | |
| const sentences = text.split(/[.!?]+/).map(s => s.trim()).filter(s => s); | |
| // Find the most relevant sentence | |
| for (const sentence of sentences) { | |
| if (sentence.toLowerCase().includes(question)) { | |
| return sentence + '.'; | |
| } | |
| } | |
| // If no exact match, return first few sentences | |
| return sentences.slice(0, 3).join(' ') + '...'; | |
| } | |
| function extractDefinition(text, term) { | |
| const sentences = text.split(/[.!?]+/); | |
| for (let i = 0; i < sentences.length; i++) { | |
| if (sentences[i].toLowerCase().includes(term)) { | |
| // Try to get the definition sentence | |
| if (sentences[i].toLowerCase().includes('is') || sentences[i].toLowerCase().includes('are')) { | |
| return sentences[i].trim() + '.'; | |
| } | |
| // Or combine with next sentence | |
| if (i < sentences.length - 1) { | |
| return (sentences[i] + ' ' + sentences[i+1]).trim() + '.'; | |
| } | |
| } | |
| } | |
| return text.split('\n')[0]; // Fallback to first line | |
| } | |
| function findAnyRelevant(question, sections) { | |
| const keywords = extractKeywords(question).map(k => k.word); | |
| for (const section of sections) { | |
| const sectionLower = section.toLowerCase(); | |
| let matches = 0; | |
| for (const keyword of keywords) { | |
| if (sectionLower.includes(keyword)) matches++; | |
| } | |
| if (matches >= keywords.length / 2) { | |
| return "This might help:\n" + section.split('\n').slice(0, 3).join('\n') + '...'; | |
| } | |
| } | |
| return null; | |
| } | |
| function findPossibleSections(question, sections) { | |
| const keywords = extractKeywords(question).map(k => k.word); | |
| const relevantSections = []; | |
| for (const section of sections) { | |
| const sectionLower = section.toLowerCase(); | |
| let matches = 0; | |
| for (const keyword of keywords) { | |
| if (sectionLower.includes(keyword)) matches++; | |
| } | |
| if (matches > 0) { | |
| const firstLine = section.split('\n')[0]; | |
| if (firstLine && !relevantSections.includes(firstLine)) { | |
| relevantSections.push(firstLine); | |
| } | |
| } | |
| } | |
| return relevantSections.slice(0, 3).map(s => "• " + s).join('\n'); | |
| } | |
| async function askQuestion(question) { | |
| if (!documentContent) { | |
| return { answer: "Please upload a document first." }; | |
| } | |
| const answer = searchDocument(question); | |
| // Enhance simple answers with more context | |
| let enhancedAnswer = answer; | |
| if (answer.includes('is') && !answer.includes('.')) { | |
| // Try to find the next sentence for more context | |
| const sentences = documentContent.split(/[.!?]+/); | |
| const idx = sentences.findIndex(s => s.trim() + '.' === answer); | |
| if (idx !== -1 && idx < sentences.length - 1) { | |
| enhancedAnswer += ' ' + sentences[idx+1].trim() + '.'; | |
| } | |
| } | |
| return { answer: enhancedAnswer }; | |
| } | |