Spaces:
Running
Running
GitHub Actions commited on
Commit Β·
e651eb1
1
Parent(s): 767c4e6
Deploy 302f7a5
Browse files
app/pipeline/nodes/generate.py
CHANGED
|
@@ -66,16 +66,17 @@ ANSWERING RULES β follow all of them every time:
|
|
| 66 |
say it, do not say it β not even if you "know" it from training data.
|
| 67 |
3. READ ALL PASSAGES. An answer may be spread across multiple passages β a blog intro
|
| 68 |
in [1], technical details in [3], project context in [5]. Synthesise all relevant
|
| 69 |
-
passages into one cohesive answer rather than stopping at the first match.
|
| 70 |
4. SCOPE. Use passages that directly address the question AND adjacent passages that
|
| 71 |
provide supporting context, background, or related facts. If multiple passages
|
| 72 |
contain information relevant to the query, you must cite all of them β do not
|
| 73 |
cite only the first relevant passage and ignore others. A response about work
|
| 74 |
experience that draws from one resume chunk must also cite any other resume chunk
|
| 75 |
that adds detail.
|
| 76 |
-
5. Cite
|
| 77 |
-
Example: "He
|
| 78 |
-
|
|
|
|
| 79 |
6. If relevant passages contain limited facts, give a short answer covering exactly
|
| 80 |
those facts β a short confident answer beats a padded hallucinated one.
|
| 81 |
7. Vary your sentence openers. Never start two consecutive sentences with "Darshan".
|
|
@@ -324,7 +325,7 @@ def _normalise_answer_text(answer: str, max_citation_index: int) -> str:
|
|
| 324 |
|
| 325 |
cleaned = _GEN_HTML_TAG_RE.sub("", answer)
|
| 326 |
cleaned = re.sub(r"\[(\d+)\]", _keep_valid_citation, cleaned)
|
| 327 |
-
cleaned = re.sub(r"(\[\d+\])(\1)+", r"\1", cleaned)
|
| 328 |
cleaned = re.sub(r"\s+([,.;:!?])", r"\1", cleaned)
|
| 329 |
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
| 330 |
return cleaned.strip()
|
|
|
|
| 66 |
say it, do not say it β not even if you "know" it from training data.
|
| 67 |
3. READ ALL PASSAGES. An answer may be spread across multiple passages β a blog intro
|
| 68 |
in [1], technical details in [3], project context in [5]. Synthesise all relevant
|
| 69 |
+
passages into one cohesive answer rather than stopping at the first match. Prioritise using varied sources (e.g., combining Resume with Project passages) to give a well-rounded answer.
|
| 70 |
4. SCOPE. Use passages that directly address the question AND adjacent passages that
|
| 71 |
provide supporting context, background, or related facts. If multiple passages
|
| 72 |
contain information relevant to the query, you must cite all of them β do not
|
| 73 |
cite only the first relevant passage and ignore others. A response about work
|
| 74 |
experience that draws from one resume chunk must also cite any other resume chunk
|
| 75 |
that adds detail.
|
| 76 |
+
5. Cite at the end of the sentence or clause, not after every single item in a list.
|
| 77 |
+
Example: "He uses Python, Kotlin, and C++ [1][4]."
|
| 78 |
+
Do NOT cite like this: "He uses Python [1], Kotlin [1], and C++ [1]."
|
| 79 |
+
When a claim is backed by multiple passages, cite all: "[1][4]".
|
| 80 |
6. If relevant passages contain limited facts, give a short answer covering exactly
|
| 81 |
those facts β a short confident answer beats a padded hallucinated one.
|
| 82 |
7. Vary your sentence openers. Never start two consecutive sentences with "Darshan".
|
|
|
|
| 325 |
|
| 326 |
cleaned = _GEN_HTML_TAG_RE.sub("", answer)
|
| 327 |
cleaned = re.sub(r"\[(\d+)\]", _keep_valid_citation, cleaned)
|
| 328 |
+
cleaned = re.sub(r"(\[\d+\])(?:\s*\1)+", r"\1", cleaned)
|
| 329 |
cleaned = re.sub(r"\s+([,.;:!?])", r"\1", cleaned)
|
| 330 |
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
| 331 |
return cleaned.strip()
|
app/pipeline/nodes/retrieve.py
CHANGED
|
@@ -64,9 +64,7 @@ _FOCUS_KEYWORDS: dict[frozenset[str], str] = {
|
|
| 64 |
frozenset({"experience", "work", "job", "role", "career", "internship",
|
| 65 |
"skills", "skill", "education", "degree", "university",
|
| 66 |
"certification", "certifications", "qualified", "resume", "cv",
|
| 67 |
-
"employment", "professional", "placement", "history",
|
| 68 |
-
"tech", "stack", "technology", "technologies", "framework",
|
| 69 |
-
"frameworks", "tool", "tools", "tooling", "language", "languages"}): "cv",
|
| 70 |
frozenset({"project", "built", "build", "developed", "architecture",
|
| 71 |
"system", "platform", "app", "application"}): "project",
|
| 72 |
frozenset({"blog", "post", "article", "wrote", "writing", "published"}): "blog",
|
|
|
|
| 64 |
frozenset({"experience", "work", "job", "role", "career", "internship",
|
| 65 |
"skills", "skill", "education", "degree", "university",
|
| 66 |
"certification", "certifications", "qualified", "resume", "cv",
|
| 67 |
+
"employment", "professional", "placement", "history"}): "cv",
|
|
|
|
|
|
|
| 68 |
frozenset({"project", "built", "build", "developed", "architecture",
|
| 69 |
"system", "platform", "app", "application"}): "project",
|
| 70 |
frozenset({"blog", "post", "article", "wrote", "writing", "published"}): "blog",
|