Spaces:
Running
Running
GitHub Actions commited on
Commit ·
7664072
1
Parent(s): eca3a47
Deploy 13e6fc0
Browse files- app/pipeline/nodes/generate.py +12 -26
- app/pipeline/nodes/retrieve.py +4 -3
app/pipeline/nodes/generate.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
from typing import Callable
|
| 2 |
-
import re
|
| 3 |
|
| 4 |
from app.models.pipeline import PipelineState
|
| 5 |
from app.models.chat import SourceRef
|
|
@@ -40,12 +39,14 @@ def make_generate_node(llm_client: LLMClient) -> Callable[[PipelineState], dict]
|
|
| 40 |
context_block = "\n\n".join(context_parts)
|
| 41 |
|
| 42 |
system_prompt = (
|
| 43 |
-
"You are Darshan's personal assistant
|
| 44 |
-
"
|
| 45 |
-
"
|
|
|
|
|
|
|
| 46 |
)
|
| 47 |
-
|
| 48 |
-
prompt = f"Context:\n{context_block}\n\
|
| 49 |
|
| 50 |
# Complete via the requested streams
|
| 51 |
stream = llm_client.complete_with_complexity(prompt=prompt, system=system_prompt, stream=True, complexity=complexity)
|
|
@@ -53,28 +54,13 @@ def make_generate_node(llm_client: LLMClient) -> Callable[[PipelineState], dict]
|
|
| 53 |
full_answer = ""
|
| 54 |
async for chunk in stream:
|
| 55 |
full_answer += chunk
|
| 56 |
-
|
| 57 |
-
#
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
# Regex to find all [Text](URL)
|
| 62 |
-
matches = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", full_answer)
|
| 63 |
-
# Find which of our source refs match the URL
|
| 64 |
-
url_to_ref = {ref.url: ref for ref in source_refs}
|
| 65 |
-
|
| 66 |
-
for title, url in matches:
|
| 67 |
-
if url in url_to_ref:
|
| 68 |
-
if url_to_ref[url] not in mentioned_refs:
|
| 69 |
-
mentioned_refs.append(url_to_ref[url])
|
| 70 |
-
|
| 71 |
-
# Fallback: if no specific inline citations were used but we have sources,
|
| 72 |
-
# we can attach all provided sources, or strictly just the mentioned ones.
|
| 73 |
-
# "extracts source refs mentioned in response" -> we return `mentioned_refs`
|
| 74 |
-
|
| 75 |
return {
|
| 76 |
"answer": full_answer,
|
| 77 |
-
"sources":
|
| 78 |
}
|
| 79 |
|
| 80 |
return generate_node
|
|
|
|
| 1 |
from typing import Callable
|
|
|
|
| 2 |
|
| 3 |
from app.models.pipeline import PipelineState
|
| 4 |
from app.models.chat import SourceRef
|
|
|
|
| 39 |
context_block = "\n\n".join(context_parts)
|
| 40 |
|
| 41 |
system_prompt = (
|
| 42 |
+
"You are Darshan Chheda's personal AI assistant embedded on his portfolio. "
|
| 43 |
+
"Answer questions using ONLY the numbered context passages below. "
|
| 44 |
+
"Cite sources inline using bracketed numbers like [1], [2] immediately after each claim. "
|
| 45 |
+
"Be concise, confident, and factual. Never invent details not present in the context. "
|
| 46 |
+
"If the context doesn't contain enough information to answer fully, say so honestly."
|
| 47 |
)
|
| 48 |
+
|
| 49 |
+
prompt = f"Context:\n{context_block}\n\nQuestion: {query}"
|
| 50 |
|
| 51 |
# Complete via the requested streams
|
| 52 |
stream = llm_client.complete_with_complexity(prompt=prompt, system=system_prompt, stream=True, complexity=complexity)
|
|
|
|
| 54 |
full_answer = ""
|
| 55 |
async for chunk in stream:
|
| 56 |
full_answer += chunk
|
| 57 |
+
|
| 58 |
+
# Always return all source refs used as context — the LLM is instructed
|
| 59 |
+
# to cite inline as [N], so every chunk in context is a potential citation.
|
| 60 |
+
# Filtering by regex is fragile; the frontend renders all sources as footnotes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
return {
|
| 62 |
"answer": full_answer,
|
| 63 |
+
"sources": source_refs
|
| 64 |
}
|
| 65 |
|
| 66 |
return generate_node
|
app/pipeline/nodes/retrieve.py
CHANGED
|
@@ -30,10 +30,11 @@ def make_retrieve_node(vector_store: VectorStore, embedder: Embedder, reranker:
|
|
| 30 |
# Reranker is async — must be awaited.
|
| 31 |
reranked = await reranker.rerank(state["query"], unique_chunks, top_k=5)
|
| 32 |
|
| 33 |
-
#
|
| 34 |
-
|
|
|
|
| 35 |
return {
|
| 36 |
-
"answer": "I don't have enough information about this in my knowledge base. Try asking about
|
| 37 |
"retrieved_chunks": [],
|
| 38 |
"reranked_chunks": [],
|
| 39 |
}
|
|
|
|
| 30 |
# Reranker is async — must be awaited.
|
| 31 |
reranked = await reranker.rerank(state["query"], unique_chunks, top_k=5)
|
| 32 |
|
| 33 |
+
# No chunks at all: collection is empty or query is too niche.
|
| 34 |
+
# Return empty so generate node returns its fallback cleanly.
|
| 35 |
+
if not reranked:
|
| 36 |
return {
|
| 37 |
+
"answer": "I don't have enough information about this in my knowledge base. Try asking about Darshan's specific projects or blog posts.",
|
| 38 |
"retrieved_chunks": [],
|
| 39 |
"reranked_chunks": [],
|
| 40 |
}
|