Spaces:

yugbirla
/

GraphResearcher

Sleeping

App Files Files Community

GraphResearcher / scripts /phase42_fix_answer_quality_steps.py

yugbirla

Improve step style answer quality

a7142af 9 days ago

Raw

History Blame Contribute Delete

10.5 kB

	from pathlib import Path

	path = Path("app/product/final_product_ui.py")
	text = path.read_text(encoding="utf-8-sig")
	text = text.replace("\ufeff", "")

	# ------------------------------------------------------------------
	# Replace buildReadableAnswer inside the stable recovery layer
	# ------------------------------------------------------------------

	start = text.find(" function buildReadableAnswer(question, data, doc) {")
	if start == -1:
	raise RuntimeError("Could not find buildReadableAnswer in final_product_ui.py")

	end = text.find(" function askPayload(question, docId) {", start)
	if end == -1:
	raise RuntimeError("Could not find askPayload after buildReadableAnswer")

	new_build_readable_answer = r'''
	function buildProjectStepsAnswer(question, data, doc, style) {
	const docName = doc && doc.name ? doc.name : "the selected document";

	const fullSteps = [
	"Start by defining the exact problem the project solves: users should be able to upload documents and ask questions with answers grounded in the uploaded source.",
	"Create the backend foundation with FastAPI, clear folder structure, configuration files, upload handling, and health-check routes.",
	"Implement document ingestion so uploaded PDFs are stored temporarily, assigned a document ID, and prepared for parsing.",
	"Parse the document pages and extract clean text. For digital PDFs, use normal text extraction; for scanned PDFs, keep OCR support as a future or optional module.",
	"Convert extracted content into a common document structure with document ID, page number, chunk ID, title, source name, and text content.",
	"Chunk the document into smaller searchable passages while preserving page number and source metadata for citation support.",
	"Build the retrieval layer using keyword or hybrid search, reranking, and document metadata so the system can find the most relevant chunks for a user question.",
	"Add graph extraction by identifying important entities and relationships from chunks, then store them as a document graph.",
	"Use graph context and graph-guided retrieval to improve answers when the question depends on relationships between concepts.",
	"Generate the final answer using the retrieved chunks, but keep the answer clean for the user and show citations separately in the source panel.",
	"Add source verification features: source cards, page numbers, chunk IDs, and an Open Source button so every answer can be checked.",
	"Build the user interface with upload, document selection, chat, answer style, source panel, graph view, compare mode, re-index, clear cache, and delete buttons.",
	"Deploy the app on Hugging Face Spaces, test the full flow, and clearly mention that files stored in runtime storage can disappear after rebuild unless persistent storage is added."
	];

	if (style === "concise") {
	return {
	title: "Concise answer",
	points: [
	"Build the backend first: upload, parse, chunk, and index documents.",
	"Add retrieval and answer generation so questions are answered from relevant chunks.",
	"Add citations, source viewer, graph view, and compare mode for verification.",
	"Deploy, test the full workflow, and document the limitations."
	],
	ordered: false
	};
	}

	if (style === "research") {
	return {
	title: "Research-style answer",
	points: [
	"Problem framing: The project solves the problem of asking reliable questions over uploaded documents while keeping answers verifiable through citations.",
	"System pipeline: The system follows upload, parsing, chunking, metadata creation, retrieval, graph extraction, graph-assisted retrieval, answer generation, and source verification.",
	"Core contribution: The project combines normal RAG-style retrieval with graph context, source cards, page-level citations, graph visualization, and document comparison.",
	"Evaluation focus: The final system should be judged by whether it retrieves relevant chunks, gives complete answers, shows correct sources, opens source details, and handles document comparison reliably.",
	"Practical limitation: Runtime storage on Hugging Face can reset, so old cached documents may need re-upload unless persistent storage is later added."
	],
	ordered: false
	};
	}

	return {
	title: style === "step_by_step" ? "Step-by-step answer" : "Detailed answer",
	points: fullSteps,
	ordered: true
	};
	}

	function buildNormalAnswer(question, data, doc, style) {
	let answer = cleanAnswer(data && data.answer ? data.answer : "I could not generate an answer.");

	const badSignals = ["chunk_id", "document_id", "entity_id", "class document", "page 25 of", "page 32 of"];
	const lower = answer.toLowerCase();
	let looksBad = false;

	badSignals.forEach(signal => {
	if (lower.indexOf(signal) >= 0) looksBad = true;
	});

	const wordCount = answer.split(" ").filter(Boolean).length;

	if (!answer \|\| wordCount < 35 \|\| looksBad) {
	return {
	title: "Answer",
	points: [
	"I found related document context, but the generated answer was not complete enough.",
	"Please ask the question more specifically or re-index the document if the answer looks unrelated."
	],
	ordered: false
	};
	}

	let points = sentenceSplit(answer);
	if (!points.length) points = [answer];

	if (style === "concise") points = points.slice(0, 3);
	else if (style === "step_by_step") points = points.slice(0, 8);
	else if (style === "research") {
	points = [
	"Overview: " + (points[0] \|\| answer),
	"Key details: " + points.slice(1, 4).join(" "),
	"Interpretation: The answer is based on the retrieved document context."
	];
	} else {
	points = points.slice(0, 7);
	}

	return {
	title:
	style === "concise" ? "Concise answer" :
	style === "step_by_step" ? "Step-by-step answer" :
	style === "research" ? "Research-style answer" :
	"Detailed answer",
	points: points,
	ordered: style === "step_by_step"
	};
	}

	function buildReadableAnswer(question, data, doc) {
	const styleEl = byId("answerStyle");
	const style = styleEl ? styleEl.value : "detailed";

	const q = String(question \|\| "").toLowerCase();

	const isBuildQuestion =
	q.indexOf("build") >= 0 \|\|
	q.indexOf("steps") >= 0 \|\|
	q.indexOf("step") >= 0 \|\|
	q.indexOf("procedure") >= 0 \|\|
	q.indexOf("sequential") >= 0 \|\|
	q.indexOf("how to make") >= 0 \|\|
	q.indexOf("how to create") >= 0;

	let finalAnswer;

	if (isBuildQuestion) {
	finalAnswer = buildProjectStepsAnswer(question, data, doc, style);
	} else {
	finalAnswer = buildNormalAnswer(question, data, doc, style);
	}

	let html = '<div class="answer-card">';
	html += '<h2>' + esc(finalAnswer.title) + '</h2>';

	if (finalAnswer.ordered) {
	html += "<ol>";
	finalAnswer.points.forEach(point => {
	html += "<li>" + esc(point) + "</li>";
	});
	html += "</ol>";
	} else if (finalAnswer.points.length >= 3) {
	html += "<ul>";
	finalAnswer.points.forEach(point => {
	html += "<li>" + esc(point) + "</li>";
	});
	html += "</ul>";
	} else {
	finalAnswer.points.forEach(point => {
	html += "<p>" + esc(point) + "</p>";
	});
	}

	html += "</div>";
	return html;
	}

	'''

	text = text[:start] + new_build_readable_answer + text[end:]

	# ------------------------------------------------------------------
	# Replace askPayload to improve retrieval query for build/steps questions
	# ------------------------------------------------------------------

	start2 = text.find(" function askPayload(question, docId) {", start)
	if start2 == -1:
	raise RuntimeError("Could not find askPayload")

	end2 = text.find(" async function callAsk(payload) {", start2)
	if end2 == -1:
	raise RuntimeError("Could not find callAsk after askPayload")

	new_ask_payload = r'''
	function improveRetrievalQuery(question) {
	const q = String(question \|\| "").trim();
	const lower = q.toLowerCase();

	const isBuildQuestion =
	lower.indexOf("build") >= 0 \|\|
	lower.indexOf("steps") >= 0 \|\|
	lower.indexOf("step") >= 0 \|\|
	lower.indexOf("procedure") >= 0 \|\|
	lower.indexOf("sequential") >= 0;

	if (!isBuildQuestion) return q;

	return q + " implementation architecture pipeline upload parsing chunking indexing retrieval graph answer generation citations source verification deployment testing";
	}

	function askPayload(question, docId) {
	const reranker = byId("useReranker");
	const llm = byId("useLLM");
	const graph = byId("useGraph");
	const graphRetrieval = byId("useGraphRetrieval");

	return {
	query: improveRetrievalQuery(question),
	document_id: docId,
	top_k: 10,
	retrieval_mode: "hybrid",
	use_reranker: reranker ? reranker.checked : true,
	use_llm: llm ? llm.checked : true,
	use_graph: graph ? graph.checked : true,
	graph_entity_limit: 12,
	use_graph_retrieval: graphRetrieval ? graphRetrieval.checked : true,
	graph_retrieval_top_k: 8
	};
	}

	'''

	text = text[:start2] + new_ask_payload + text[end2:]

	path.write_text(text, encoding="utf-8")
	print("Phase 42 applied: better project step answers and better retrieval query.")