GitHub Actions commited on
Commit
e651eb1
Β·
1 Parent(s): 767c4e6

Deploy 302f7a5

Browse files
app/pipeline/nodes/generate.py CHANGED
@@ -66,16 +66,17 @@ ANSWERING RULES β€” follow all of them every time:
66
  say it, do not say it β€” not even if you "know" it from training data.
67
  3. READ ALL PASSAGES. An answer may be spread across multiple passages β€” a blog intro
68
  in [1], technical details in [3], project context in [5]. Synthesise all relevant
69
- passages into one cohesive answer rather than stopping at the first match.
70
  4. SCOPE. Use passages that directly address the question AND adjacent passages that
71
  provide supporting context, background, or related facts. If multiple passages
72
  contain information relevant to the query, you must cite all of them β€” do not
73
  cite only the first relevant passage and ignore others. A response about work
74
  experience that draws from one resume chunk must also cite any other resume chunk
75
  that adds detail.
76
- 5. Cite every claim immediately after it with [N] where N is the passage number.
77
- Example: "He optimised inference to 60 fps [1] by quantising the model [3]."
78
- When a claim is backed by multiple passages, cite all: "He uses Python [1][4]."
 
79
  6. If relevant passages contain limited facts, give a short answer covering exactly
80
  those facts β€” a short confident answer beats a padded hallucinated one.
81
  7. Vary your sentence openers. Never start two consecutive sentences with "Darshan".
@@ -324,7 +325,7 @@ def _normalise_answer_text(answer: str, max_citation_index: int) -> str:
324
 
325
  cleaned = _GEN_HTML_TAG_RE.sub("", answer)
326
  cleaned = re.sub(r"\[(\d+)\]", _keep_valid_citation, cleaned)
327
- cleaned = re.sub(r"(\[\d+\])(\1)+", r"\1", cleaned)
328
  cleaned = re.sub(r"\s+([,.;:!?])", r"\1", cleaned)
329
  cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
330
  return cleaned.strip()
 
66
  say it, do not say it β€” not even if you "know" it from training data.
67
  3. READ ALL PASSAGES. An answer may be spread across multiple passages β€” a blog intro
68
  in [1], technical details in [3], project context in [5]. Synthesise all relevant
69
+ passages into one cohesive answer rather than stopping at the first match. Prioritise using varied sources (e.g., combining Resume with Project passages) to give a well-rounded answer.
70
  4. SCOPE. Use passages that directly address the question AND adjacent passages that
71
  provide supporting context, background, or related facts. If multiple passages
72
  contain information relevant to the query, you must cite all of them β€” do not
73
  cite only the first relevant passage and ignore others. A response about work
74
  experience that draws from one resume chunk must also cite any other resume chunk
75
  that adds detail.
76
+ 5. Cite at the end of the sentence or clause, not after every single item in a list.
77
+ Example: "He uses Python, Kotlin, and C++ [1][4]."
78
+ Do NOT cite like this: "He uses Python [1], Kotlin [1], and C++ [1]."
79
+ When a claim is backed by multiple passages, cite all: "[1][4]".
80
  6. If relevant passages contain limited facts, give a short answer covering exactly
81
  those facts β€” a short confident answer beats a padded hallucinated one.
82
  7. Vary your sentence openers. Never start two consecutive sentences with "Darshan".
 
325
 
326
  cleaned = _GEN_HTML_TAG_RE.sub("", answer)
327
  cleaned = re.sub(r"\[(\d+)\]", _keep_valid_citation, cleaned)
328
+ cleaned = re.sub(r"(\[\d+\])(?:\s*\1)+", r"\1", cleaned)
329
  cleaned = re.sub(r"\s+([,.;:!?])", r"\1", cleaned)
330
  cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
331
  return cleaned.strip()
app/pipeline/nodes/retrieve.py CHANGED
@@ -64,9 +64,7 @@ _FOCUS_KEYWORDS: dict[frozenset[str], str] = {
64
  frozenset({"experience", "work", "job", "role", "career", "internship",
65
  "skills", "skill", "education", "degree", "university",
66
  "certification", "certifications", "qualified", "resume", "cv",
67
- "employment", "professional", "placement", "history",
68
- "tech", "stack", "technology", "technologies", "framework",
69
- "frameworks", "tool", "tools", "tooling", "language", "languages"}): "cv",
70
  frozenset({"project", "built", "build", "developed", "architecture",
71
  "system", "platform", "app", "application"}): "project",
72
  frozenset({"blog", "post", "article", "wrote", "writing", "published"}): "blog",
 
64
  frozenset({"experience", "work", "job", "role", "career", "internship",
65
  "skills", "skill", "education", "degree", "university",
66
  "certification", "certifications", "qualified", "resume", "cv",
67
+ "employment", "professional", "placement", "history"}): "cv",
 
 
68
  frozenset({"project", "built", "build", "developed", "architecture",
69
  "system", "platform", "app", "application"}): "project",
70
  frozenset({"blog", "post", "article", "wrote", "writing", "published"}): "blog",