omarkashif commited on
Commit
408354f
·
verified ·
1 Parent(s): e4da880

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +33 -7
src/streamlit_app.py CHANGED
@@ -58,24 +58,50 @@ def retrieve_documents(query, top_k=10):
58
 
59
  def generate_response(user_query, docs):
60
  context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
61
- sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  messages = [
63
  {"role": "system", "content":
64
  "You are a helpful legal assistant. Use the provided context from the documents to answer the user's question. "
65
  "At the end of your answer, write a single line starting with 'Source: ' and list the sources of the documents you used. "
66
- "If multiple sources are used, separate them with commas. The user should be able to clearly understand where the information came from."}
 
 
 
67
  ]
 
 
68
  messages.extend(st.session_state.history)
 
69
  messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
70
- f"Sources:\n{', '.join(sources)}\n\n"
71
  f"Question:\n{user_query}"})
72
-
73
  try:
74
  resp = client.chat.completions.create(
75
  model="gpt-4o-mini",
76
  messages=messages,
77
  temperature=0.2,
78
- max_tokens=750
79
  )
80
  reply = resp.choices[0].message.content.strip()
81
  except Exception as e:
@@ -83,8 +109,8 @@ def generate_response(user_query, docs):
83
  reply = "Sorry, I encountered an error generating the answer."
84
 
85
  # Optional: force clean source line if LLM misses it
86
- if sources:
87
- clean_sources = ", ".join(sources)
88
  if "Source:" not in reply:
89
  reply += f"\n\nSource: {clean_sources}"
90
 
 
58
 
59
  def generate_response(user_query, docs):
60
  context = "\n\n---\n\n".join(d['metadata']['text'] for d in docs)
61
+ # sources = sorted({d['metadata']['chunk_id'] for d in docs if 'source' in d['metadata']})
62
+ readable_sources = []
63
+ for d in docs:
64
+ meta = d['metadata']
65
+ src = meta.get("source", "unknown")
66
+ cid = meta.get("chunk_id", "N/A")
67
+
68
+ if src.lower() in ["constitution", "fbr_ordinance", "ordinance"]:
69
+ # For constitution and ordinances, chunk_id is enough
70
+ readable_sources.append(f"{src.title()} (Chunk {cid})")
71
+
72
+ elif src.lower() in ["case_law", "case", "tax_case"]:
73
+ # For case law, add first ~30 words of text
74
+ text_preview = " ".join(meta.get("text", "").split()[:30])
75
+ readable_sources.append(f"Case Law (Chunk {cid}): {text_preview}...")
76
+
77
+ else:
78
+ readable_sources.append(f"{src.title()} (Chunk {cid})")
79
+
80
+ # Deduplicate sources
81
+ readable_sources = sorted(set(readable_sources))
82
+
83
  messages = [
84
  {"role": "system", "content":
85
  "You are a helpful legal assistant. Use the provided context from the documents to answer the user's question. "
86
  "At the end of your answer, write a single line starting with 'Source: ' and list the sources of the documents you used. "
87
+ "Sources should be written in a human-friendly way using metadata:\n"
88
+ "- Constitution / Ordinances → just mention source name and chunk ID.\n"
89
+ "- Case law → mention source name, chunk ID, and first ~30 words of the text as a preview.\n"
90
+ "Do not invent sources. If multiple are used, separate them with commas."}
91
  ]
92
+
93
+
94
  messages.extend(st.session_state.history)
95
+
96
  messages.append({"role": "user", "content": f"Context:\n{context}\n\n"
97
+ f"Sources:\n{', '.join(readable_sources)}\n\n"
98
  f"Question:\n{user_query}"})
 
99
  try:
100
  resp = client.chat.completions.create(
101
  model="gpt-4o-mini",
102
  messages=messages,
103
  temperature=0.2,
104
+ max_tokens=900
105
  )
106
  reply = resp.choices[0].message.content.strip()
107
  except Exception as e:
 
109
  reply = "Sorry, I encountered an error generating the answer."
110
 
111
  # Optional: force clean source line if LLM misses it
112
+ if readable_sources:
113
+ clean_sources = ", ".join(readable_sources)
114
  if "Source:" not in reply:
115
  reply += f"\n\nSource: {clean_sources}"
116