Spaces:

omarkashif
/

test

Sleeping

App Files Files Community

omarkashif commited on Aug 25, 2025

Commit

408354f

verified ·

1 Parent(s): e4da880

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +33 -7

src/streamlit_app.py CHANGED Viewed

@@ -58,24 +58,50 @@ def retrieve_documents(query, top_k=10):
 def generate_response(user_query, docs):
     context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
-    sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
     messages = [
         {"role": "system", "content":
          "You are a helpful legal assistant. Use the provided context from the documents to answer the user's question. "
          "At the end of your answer, write a single line starting with 'Source: ' and list the sources of the documents you used. "
-         "If multiple sources are used, separate them with commas. The user should be able to clearly understand where the information came from."}
     ]
     messages.extend(st.session_state.history)
     messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
-                   f"Sources:\n{', '.join(sources)}\n\n"
                    f"Question:\n{user_query}"})
     try:
         resp = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=messages,
             temperature=0.2,
-            max_tokens=750
         )
         reply = resp.choices[0].message.content.strip()
     except Exception as e:
@@ -83,8 +109,8 @@ def generate_response(user_query, docs):
         reply = "Sorry, I encountered an error generating the answer."
     # Optional: force clean source line if LLM misses it
-    if sources:
-        clean_sources = ", ".join(sources)
         if "Source:" not in reply:
             reply += f"\n\nSource: {clean_sources}"

 def generate_response(user_query, docs):
     context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
+    # sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
+    readable_sources = []
+    for d in docs:
+        meta = d['metadata']
+        src = meta.get("source", "unknown")
+        cid = meta.get("chunk_id", "N/A")
+        if src.lower() in ["constitution", "fbr_ordinance", "ordinance"]:
+            # For constitution and ordinances, chunk_id is enough
+            readable_sources.append(f"{src.title()} (Chunk {cid})")
+        elif src.lower() in ["case_law", "case", "tax_case"]:
+            # For case law, add first ~30 words of text
+            text_preview = " ".join(meta.get("text", "").split()[:30])
+            readable_sources.append(f"Case Law (Chunk {cid}): {text_preview}...")
+        else:
+            readable_sources.append(f"{src.title()} (Chunk {cid})")
+    # Deduplicate sources
+    readable_sources = sorted(set(readable_sources))
     messages = [
         {"role": "system", "content":
          "You are a helpful legal assistant. Use the provided context from the documents to answer the user's question. "
          "At the end of your answer, write a single line starting with 'Source: ' and list the sources of the documents you used. "
+         "Sources should be written in a human-friendly way using metadata:\n"
+         "- Constitution / Ordinances → just mention source name and chunk ID.\n"
+         "- Case law → mention source name, chunk ID, and first ~30 words of the text as a preview.\n"
+         "Do not invent sources. If multiple are used, separate them with commas."}
     ]
     messages.extend(st.session_state.history)
     messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
+                   f"Sources:\n{', '.join(readable_sources)}\n\n"
                    f"Question:\n{user_query}"})
     try:
         resp = client.chat.completions.create(
             model="gpt-4o-mini",
             messages=messages,
             temperature=0.2,
+            max_tokens=900
         )
         reply = resp.choices[0].message.content.strip()
     except Exception as e:
         reply = "Sorry, I encountered an error generating the answer."
     # Optional: force clean source line if LLM misses it
+    if readable_sources:
+        clean_sources = ", ".join(readable_sources)
         if "Source:" not in reply:
             reply += f"\n\nSource: {clean_sources}"