Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +33 -7
src/streamlit_app.py
CHANGED
|
@@ -58,24 +58,50 @@ def retrieve_documents(query, top_k=10):
|
|
| 58 |
|
| 59 |
def generate_response(user_query, docs):
|
| 60 |
context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
|
| 61 |
-
sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
messages = [
|
| 63 |
{"role": "system", "content":
|
| 64 |
"You are a helpful legal assistant. Use the provided context from the documents to answer the user's question. "
|
| 65 |
"At the end of your answer, write a single line starting with 'Source: ' and list the sources of the documents you used. "
|
| 66 |
-
"
|
|
|
|
|
|
|
|
|
|
| 67 |
]
|
|
|
|
|
|
|
| 68 |
messages.extend(st.session_state.history)
|
|
|
|
| 69 |
messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
|
| 70 |
-
f"Sources:\n{', '.join(
|
| 71 |
f"Question:\n{user_query}"})
|
| 72 |
-
|
| 73 |
try:
|
| 74 |
resp = client.chat.completions.create(
|
| 75 |
model="gpt-4o-mini",
|
| 76 |
messages=messages,
|
| 77 |
temperature=0.2,
|
| 78 |
-
max_tokens=
|
| 79 |
)
|
| 80 |
reply = resp.choices[0].message.content.strip()
|
| 81 |
except Exception as e:
|
|
@@ -83,8 +109,8 @@ def generate_response(user_query, docs):
|
|
| 83 |
reply = "Sorry, I encountered an error generating the answer."
|
| 84 |
|
| 85 |
# Optional: force clean source line if LLM misses it
|
| 86 |
-
if
|
| 87 |
-
clean_sources = ", ".join(
|
| 88 |
if "Source:" not in reply:
|
| 89 |
reply += f"\n\nSource: {clean_sources}"
|
| 90 |
|
|
|
|
| 58 |
|
| 59 |
def generate_response(user_query, docs):
|
| 60 |
context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
|
| 61 |
+
# sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
|
| 62 |
+
readable_sources = []
|
| 63 |
+
for d in docs:
|
| 64 |
+
meta = d['metadata']
|
| 65 |
+
src = meta.get("source", "unknown")
|
| 66 |
+
cid = meta.get("chunk_id", "N/A")
|
| 67 |
+
|
| 68 |
+
if src.lower() in ["constitution", "fbr_ordinance", "ordinance"]:
|
| 69 |
+
# For constitution and ordinances, chunk_id is enough
|
| 70 |
+
readable_sources.append(f"{src.title()} (Chunk {cid})")
|
| 71 |
+
|
| 72 |
+
elif src.lower() in ["case_law", "case", "tax_case"]:
|
| 73 |
+
# For case law, add first ~30 words of text
|
| 74 |
+
text_preview = " ".join(meta.get("text", "").split()[:30])
|
| 75 |
+
readable_sources.append(f"Case Law (Chunk {cid}): {text_preview}...")
|
| 76 |
+
|
| 77 |
+
else:
|
| 78 |
+
readable_sources.append(f"{src.title()} (Chunk {cid})")
|
| 79 |
+
|
| 80 |
+
# Deduplicate sources
|
| 81 |
+
readable_sources = sorted(set(readable_sources))
|
| 82 |
+
|
| 83 |
messages = [
|
| 84 |
{"role": "system", "content":
|
| 85 |
"You are a helpful legal assistant. Use the provided context from the documents to answer the user's question. "
|
| 86 |
"At the end of your answer, write a single line starting with 'Source: ' and list the sources of the documents you used. "
|
| 87 |
+
"Sources should be written in a human-friendly way using metadata:\n"
|
| 88 |
+
"- Constitution / Ordinances → just mention source name and chunk ID.\n"
|
| 89 |
+
"- Case law → mention source name, chunk ID, and first ~30 words of the text as a preview.\n"
|
| 90 |
+
"Do not invent sources. If multiple are used, separate them with commas."}
|
| 91 |
]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
messages.extend(st.session_state.history)
|
| 95 |
+
|
| 96 |
messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
|
| 97 |
+
f"Sources:\n{', '.join(readable_sources)}\n\n"
|
| 98 |
f"Question:\n{user_query}"})
|
|
|
|
| 99 |
try:
|
| 100 |
resp = client.chat.completions.create(
|
| 101 |
model="gpt-4o-mini",
|
| 102 |
messages=messages,
|
| 103 |
temperature=0.2,
|
| 104 |
+
max_tokens=900
|
| 105 |
)
|
| 106 |
reply = resp.choices[0].message.content.strip()
|
| 107 |
except Exception as e:
|
|
|
|
| 109 |
reply = "Sorry, I encountered an error generating the answer."
|
| 110 |
|
| 111 |
# Optional: force clean source line if LLM misses it
|
| 112 |
+
if readable_sources:
|
| 113 |
+
clean_sources = ", ".join(readable_sources)
|
| 114 |
if "Source:" not in reply:
|
| 115 |
reply += f"\n\nSource: {clean_sources}"
|
| 116 |
|