Spaces:

sathaye3
/

docuchat-oracle

Running

App Files Files Community

docuchat-oracle / script.js

sathaye3's picture

not answering even tho there is information in the document

ee8dccf verified 4 months ago

history blame contribute delete

6.88 kB


	// Document content storage
	let documentContent = '';

	// Process uploaded document
	async function processDocument(file) {
	return new Promise((resolve) => {
	const reader = new FileReader();
	reader.onload = (e) => {
	documentContent = e.target.result;
	resolve({ success: true });
	};
	reader.readAsText(file);
	});
	}
	// Enhanced document search with better matching and context
	function searchDocument(question) {
	if (!documentContent) return "Please upload a document first.";

	const questionLower = question.toLowerCase();
	const docLower = documentContent.toLowerCase();

	// Split into meaningful units (paragraphs or sections)
	const sections = documentContent.split(/\n\s*\n+/);

	// First try exact matches or very close matches
	for (const section of sections) {
	const sectionLower = section.toLowerCase();

	// Check for direct question matches
	if (sectionLower.includes(questionLower)) {
	return extractBestAnswer(section, questionLower);
	}

	// Check for "what is" questions
	if (questionLower.startsWith('what is') \|\| questionLower.startsWith('what are')) {
	const targetTerm = questionLower.split(' ').slice(2).join(' ');
	if (sectionLower.includes(targetTerm)) {
	return extractDefinition(section, targetTerm);
	}
	}

	// Check for "how to" questions
	if (questionLower.startsWith('how to') \|\| questionLower.startsWith('how do')) {
	const action = questionLower.split(' ').slice(2).join(' ');
	if (sectionLower.includes(action)) {
	return extractInstructions(section, action);
	}
	}
	}

	// Try semantic matching with weighted keywords
	const keywords = extractKeywords(questionLower);
	let bestMatch = { text: '', score: 0 };

	for (const section of sections) {
	const sectionLower = section.toLowerCase();
	let score = 0;

	for (const keyword of keywords) {
	if (sectionLower.includes(keyword)) {
	score += keyword.weight;

	// Bonus for proximity to other keywords
	const keywordPos = sectionLower.indexOf(keyword);
	for (const otherKeyword of keywords) {
	if (otherKeyword !== keyword) {
	const otherPos = sectionLower.indexOf(otherKeyword);
	if (otherPos !== -1 && Math.abs(keywordPos - otherPos) < 100) {
	score += 5;
	}
	}
	}
	}
	}

	if (score > bestMatch.score) {
	bestMatch = { text: section, score };
	}
	}

	if (bestMatch.score > 0) {
	return extractRelevantPart(bestMatch.text, keywords);
	}

	// As last resort, try to find any relevant information
	const relevant = findAnyRelevant(questionLower, sections);
	if (relevant) return relevant;

	return "I found some information that might be relevant, but couldn't find a direct answer to your question. Try asking differently or check these sections:\n" +
	findPossibleSections(questionLower, sections);
	}

	// Helper functions
	function extractKeywords(question) {
	const words = question.split(' ').filter(w => w.length > 3);
	const weights = {
	'what': 1, 'how': 2, 'why': 2, 'when': 2, 'where': 2,
	'is': 1, 'are': 1, 'does': 2, 'do': 1, 'can': 2,
	'explain': 3, 'describe': 3, 'define': 3, 'list': 2
	};

	return words.map(word => ({
	word: word,
	weight: weights[word] \|\| 1
	}));
	}

	function extractBestAnswer(text, question) {
	// Split into sentences
	const sentences = text.split(/[.!?]+/).map(s => s.trim()).filter(s => s);

	// Find the most relevant sentence
	for (const sentence of sentences) {
	if (sentence.toLowerCase().includes(question)) {
	return sentence + '.';
	}
	}

	// If no exact match, return first few sentences
	return sentences.slice(0, 3).join(' ') + '...';
	}

	function extractDefinition(text, term) {
	const sentences = text.split(/[.!?]+/);
	for (let i = 0; i < sentences.length; i++) {
	if (sentences[i].toLowerCase().includes(term)) {
	// Try to get the definition sentence
	if (sentences[i].toLowerCase().includes('is') \|\| sentences[i].toLowerCase().includes('are')) {
	return sentences[i].trim() + '.';
	}
	// Or combine with next sentence
	if (i < sentences.length - 1) {
	return (sentences[i] + ' ' + sentences[i+1]).trim() + '.';
	}
	}
	}
	return text.split('\n')[0]; // Fallback to first line
	}

	function findAnyRelevant(question, sections) {
	const keywords = extractKeywords(question).map(k => k.word);

	for (const section of sections) {
	const sectionLower = section.toLowerCase();
	let matches = 0;

	for (const keyword of keywords) {
	if (sectionLower.includes(keyword)) matches++;
	}

	if (matches >= keywords.length / 2) {
	return "This might help:\n" + section.split('\n').slice(0, 3).join('\n') + '...';
	}
	}

	return null;
	}

	function findPossibleSections(question, sections) {
	const keywords = extractKeywords(question).map(k => k.word);
	const relevantSections = [];

	for (const section of sections) {
	const sectionLower = section.toLowerCase();
	let matches = 0;

	for (const keyword of keywords) {
	if (sectionLower.includes(keyword)) matches++;
	}

	if (matches > 0) {
	const firstLine = section.split('\n')[0];
	if (firstLine && !relevantSections.includes(firstLine)) {
	relevantSections.push(firstLine);
	}
	}
	}

	return relevantSections.slice(0, 3).map(s => "• " + s).join('\n');
	}
	async function askQuestion(question) {
	if (!documentContent) {
	return { answer: "Please upload a document first." };
	}

	const answer = searchDocument(question);

	// Enhance simple answers with more context
	let enhancedAnswer = answer;
	if (answer.includes('is') && !answer.includes('.')) {
	// Try to find the next sentence for more context
	const sentences = documentContent.split(/[.!?]+/);
	const idx = sentences.findIndex(s => s.trim() + '.' === answer);
	if (idx !== -1 && idx < sentences.length - 1) {
	enhancedAnswer += ' ' + sentences[idx+1].trim() + '.';
	}
	}

	return { answer: enhancedAnswer };
	}