Spaces:

omarkashif
/

test

Sleeping

App Files Files Community

omarkashif commited on Sep 6, 2025

Commit

f6d9e3b

verified ·

1 Parent(s): b832b0a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +32 -17

src/streamlit_app.py CHANGED Viewed

@@ -18,6 +18,7 @@ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
 index = pc.Index("legal-ai")
 model = SentenceTransformer('all-mpnet-base-v2')
 chat_history = deque(maxlen=10)  # last 5 pairs = 10 messages
 st.title("AI Legal Assistant ⚖️")
@@ -36,7 +37,7 @@ def get_rewritten_query(user_query):
     ]
     try:
         resp = client.chat.completions.create(
-            model="gpt-4.1-mini",
             messages=messages,
             temperature=0.1,
             max_tokens=400
@@ -70,10 +71,11 @@ def clean_chunk_id(cid: str) -> str:
 def generate_response(user_query, docs):
     context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
-    # sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
-    # --- Build human-friendly sources ---
-    readable_sources = []
     for d in docs:
         meta = d['metadata']
         src = meta.get("source", "unknown").lower()
@@ -81,20 +83,21 @@ def generate_response(user_query, docs):
         text_preview = " ".join(meta.get("text", "").split()[:30])
         if src in ["constitution"]:
-            readable_sources.append(f"Constitution ({clean_chunk_id(cid)})")
         elif src in ["fbr_ordinance", "ordinance", "tax_ordinance"]:
-            readable_sources.append(f"Tax Ordinance ({clean_chunk_id(cid)})")
         elif src in ["case_law", "case", "tax_case"]:
-            # Use first ~30 words of the actual text
-            readable_sources.append(f"Case Law: {text_preview}...")
         else:
-            readable_sources.append(f"{src.title()} ({clean_chunk_id(cid)})")
-    # Deduplicate and sort
-    readable_sources = sorted(set(readable_sources))
     # --- System prompt ---
     messages = [
@@ -108,15 +111,14 @@ def generate_response(user_query, docs):
          "If multiple are used, separate them with commas."}
     ]
     messages.extend(st.session_state.history)
     messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
-                   f"Sources:\n{', '.join(readable_sources)}\n\n"
                    f"Question:\n{user_query}"})
     try:
         resp = client.chat.completions.create(
-            model="gpt-4.1-mini",
             messages=messages,
             temperature=0.1,
             max_tokens=900
@@ -127,16 +129,29 @@ def generate_response(user_query, docs):
         reply = "Sorry, I encountered an error generating the answer."
     # Optional: force clean source line if LLM misses it
-    if readable_sources:
-        clean_sources = ", ".join(readable_sources)
         if "Source:" not in reply:
             reply += f"\n\nSource: {clean_sources}"
     st.session_state.history.append({"role": "assistant", "content": reply})
     return reply
 # Chat UI
 with st.form("chat_input", clear_on_submit=True):
     user_input = st.text_input("You:", "")

 index = pc.Index("legal-ai")
 model = SentenceTransformer('all-mpnet-base-v2')
 chat_history = deque(maxlen=10)  # last 5 pairs = 10 messages
+ll_model = 'gpt-4o-mini'
 st.title("AI Legal Assistant ⚖️")
     ]
     try:
         resp = client.chat.completions.create(
+            model=ll_model,
             messages=messages,
             temperature=0.1,
             max_tokens=400
 def generate_response(user_query, docs):
+    # --- Collect context ---
     context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
+    # --- Build human-friendly sources + mapping ---
+    source_links = {}
     for d in docs:
         meta = d['metadata']
         src = meta.get("source", "unknown").lower()
         text_preview = " ".join(meta.get("text", "").split()[:30])
         if src in ["constitution"]:
+            display_name = f"Constitution ({clean_chunk_id(cid)})"
         elif src in ["fbr_ordinance", "ordinance", "tax_ordinance"]:
+            display_name = f"Tax Ordinance ({clean_chunk_id(cid)})"
         elif src in ["case_law", "case", "tax_case"]:
+            display_name = f"Case Law: {text_preview}..."
         else:
+            display_name = f"{src.title()} ({clean_chunk_id(cid)})"
+        source_links[display_name] = meta.get("text", "")
+    # Deduplicate
+    source_links = dict(sorted(source_links.items()))
     # --- System prompt ---
     messages = [
          "If multiple are used, separate them with commas."}
     ]
     messages.extend(st.session_state.history)
     messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
+                   f"Sources:\n{', '.join(source_links.keys())}\n\n"
                    f"Question:\n{user_query}"})
     try:
         resp = client.chat.completions.create(
+            model=ll_model,
             messages=messages,
             temperature=0.1,
             max_tokens=900
         reply = "Sorry, I encountered an error generating the answer."
     # Optional: force clean source line if LLM misses it
+    if source_links:
+        clean_sources = ", ".join(source_links.keys())
         if "Source:" not in reply:
             reply += f"\n\nSource: {clean_sources}"
+    # Save reply into history
     st.session_state.history.append({"role": "assistant", "content": reply})
+    # --- Render in Streamlit ---
+    st.markdown(reply)
+    # Add expandable sources
+    if source_links:
+        st.write("### Sources")
+        for name, text in source_links.items():
+            with st.expander(name):
+                st.write(text)
     return reply
 # Chat UI
 with st.form("chat_input", clear_on_submit=True):
     user_input = st.text_input("You:", "")