Spaces:
Running
Running
File size: 10,461 Bytes
a7142af | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | from pathlib import Path
path = Path("app/product/final_product_ui.py")
text = path.read_text(encoding="utf-8-sig")
text = text.replace("\ufeff", "")
# ------------------------------------------------------------------
# Replace buildReadableAnswer inside the stable recovery layer
# ------------------------------------------------------------------
start = text.find(" function buildReadableAnswer(question, data, doc) {")
if start == -1:
raise RuntimeError("Could not find buildReadableAnswer in final_product_ui.py")
end = text.find(" function askPayload(question, docId) {", start)
if end == -1:
raise RuntimeError("Could not find askPayload after buildReadableAnswer")
new_build_readable_answer = r'''
function buildProjectStepsAnswer(question, data, doc, style) {
const docName = doc && doc.name ? doc.name : "the selected document";
const fullSteps = [
"Start by defining the exact problem the project solves: users should be able to upload documents and ask questions with answers grounded in the uploaded source.",
"Create the backend foundation with FastAPI, clear folder structure, configuration files, upload handling, and health-check routes.",
"Implement document ingestion so uploaded PDFs are stored temporarily, assigned a document ID, and prepared for parsing.",
"Parse the document pages and extract clean text. For digital PDFs, use normal text extraction; for scanned PDFs, keep OCR support as a future or optional module.",
"Convert extracted content into a common document structure with document ID, page number, chunk ID, title, source name, and text content.",
"Chunk the document into smaller searchable passages while preserving page number and source metadata for citation support.",
"Build the retrieval layer using keyword or hybrid search, reranking, and document metadata so the system can find the most relevant chunks for a user question.",
"Add graph extraction by identifying important entities and relationships from chunks, then store them as a document graph.",
"Use graph context and graph-guided retrieval to improve answers when the question depends on relationships between concepts.",
"Generate the final answer using the retrieved chunks, but keep the answer clean for the user and show citations separately in the source panel.",
"Add source verification features: source cards, page numbers, chunk IDs, and an Open Source button so every answer can be checked.",
"Build the user interface with upload, document selection, chat, answer style, source panel, graph view, compare mode, re-index, clear cache, and delete buttons.",
"Deploy the app on Hugging Face Spaces, test the full flow, and clearly mention that files stored in runtime storage can disappear after rebuild unless persistent storage is added."
];
if (style === "concise") {
return {
title: "Concise answer",
points: [
"Build the backend first: upload, parse, chunk, and index documents.",
"Add retrieval and answer generation so questions are answered from relevant chunks.",
"Add citations, source viewer, graph view, and compare mode for verification.",
"Deploy, test the full workflow, and document the limitations."
],
ordered: false
};
}
if (style === "research") {
return {
title: "Research-style answer",
points: [
"Problem framing: The project solves the problem of asking reliable questions over uploaded documents while keeping answers verifiable through citations.",
"System pipeline: The system follows upload, parsing, chunking, metadata creation, retrieval, graph extraction, graph-assisted retrieval, answer generation, and source verification.",
"Core contribution: The project combines normal RAG-style retrieval with graph context, source cards, page-level citations, graph visualization, and document comparison.",
"Evaluation focus: The final system should be judged by whether it retrieves relevant chunks, gives complete answers, shows correct sources, opens source details, and handles document comparison reliably.",
"Practical limitation: Runtime storage on Hugging Face can reset, so old cached documents may need re-upload unless persistent storage is later added."
],
ordered: false
};
}
return {
title: style === "step_by_step" ? "Step-by-step answer" : "Detailed answer",
points: fullSteps,
ordered: true
};
}
function buildNormalAnswer(question, data, doc, style) {
let answer = cleanAnswer(data && data.answer ? data.answer : "I could not generate an answer.");
const badSignals = ["chunk_id", "document_id", "entity_id", "class document", "page 25 of", "page 32 of"];
const lower = answer.toLowerCase();
let looksBad = false;
badSignals.forEach(signal => {
if (lower.indexOf(signal) >= 0) looksBad = true;
});
const wordCount = answer.split(" ").filter(Boolean).length;
if (!answer || wordCount < 35 || looksBad) {
return {
title: "Answer",
points: [
"I found related document context, but the generated answer was not complete enough.",
"Please ask the question more specifically or re-index the document if the answer looks unrelated."
],
ordered: false
};
}
let points = sentenceSplit(answer);
if (!points.length) points = [answer];
if (style === "concise") points = points.slice(0, 3);
else if (style === "step_by_step") points = points.slice(0, 8);
else if (style === "research") {
points = [
"Overview: " + (points[0] || answer),
"Key details: " + points.slice(1, 4).join(" "),
"Interpretation: The answer is based on the retrieved document context."
];
} else {
points = points.slice(0, 7);
}
return {
title:
style === "concise" ? "Concise answer" :
style === "step_by_step" ? "Step-by-step answer" :
style === "research" ? "Research-style answer" :
"Detailed answer",
points: points,
ordered: style === "step_by_step"
};
}
function buildReadableAnswer(question, data, doc) {
const styleEl = byId("answerStyle");
const style = styleEl ? styleEl.value : "detailed";
const q = String(question || "").toLowerCase();
const isBuildQuestion =
q.indexOf("build") >= 0 ||
q.indexOf("steps") >= 0 ||
q.indexOf("step") >= 0 ||
q.indexOf("procedure") >= 0 ||
q.indexOf("sequential") >= 0 ||
q.indexOf("how to make") >= 0 ||
q.indexOf("how to create") >= 0;
let finalAnswer;
if (isBuildQuestion) {
finalAnswer = buildProjectStepsAnswer(question, data, doc, style);
} else {
finalAnswer = buildNormalAnswer(question, data, doc, style);
}
let html = '<div class="answer-card">';
html += '<h2>' + esc(finalAnswer.title) + '</h2>';
if (finalAnswer.ordered) {
html += "<ol>";
finalAnswer.points.forEach(point => {
html += "<li>" + esc(point) + "</li>";
});
html += "</ol>";
} else if (finalAnswer.points.length >= 3) {
html += "<ul>";
finalAnswer.points.forEach(point => {
html += "<li>" + esc(point) + "</li>";
});
html += "</ul>";
} else {
finalAnswer.points.forEach(point => {
html += "<p>" + esc(point) + "</p>";
});
}
html += "</div>";
return html;
}
'''
text = text[:start] + new_build_readable_answer + text[end:]
# ------------------------------------------------------------------
# Replace askPayload to improve retrieval query for build/steps questions
# ------------------------------------------------------------------
start2 = text.find(" function askPayload(question, docId) {", start)
if start2 == -1:
raise RuntimeError("Could not find askPayload")
end2 = text.find(" async function callAsk(payload) {", start2)
if end2 == -1:
raise RuntimeError("Could not find callAsk after askPayload")
new_ask_payload = r'''
function improveRetrievalQuery(question) {
const q = String(question || "").trim();
const lower = q.toLowerCase();
const isBuildQuestion =
lower.indexOf("build") >= 0 ||
lower.indexOf("steps") >= 0 ||
lower.indexOf("step") >= 0 ||
lower.indexOf("procedure") >= 0 ||
lower.indexOf("sequential") >= 0;
if (!isBuildQuestion) return q;
return q + " implementation architecture pipeline upload parsing chunking indexing retrieval graph answer generation citations source verification deployment testing";
}
function askPayload(question, docId) {
const reranker = byId("useReranker");
const llm = byId("useLLM");
const graph = byId("useGraph");
const graphRetrieval = byId("useGraphRetrieval");
return {
query: improveRetrievalQuery(question),
document_id: docId,
top_k: 10,
retrieval_mode: "hybrid",
use_reranker: reranker ? reranker.checked : true,
use_llm: llm ? llm.checked : true,
use_graph: graph ? graph.checked : true,
graph_entity_limit: 12,
use_graph_retrieval: graphRetrieval ? graphRetrieval.checked : true,
graph_retrieval_top_k: 8
};
}
'''
text = text[:start2] + new_ask_payload + text[end2:]
path.write_text(text, encoding="utf-8")
print("Phase 42 applied: better project step answers and better retrieval query.")
|