decodingdatascience commited on
Commit
cda90c5
·
verified ·
1 Parent(s): ef780a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -16
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import os
2
  from pathlib import Path
3
  import requests
@@ -41,10 +45,10 @@ FAQ_ITEMS = [
41
 
42
  LOGO_RAW_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/airesidency/main/dds-logo-removebg-preview.png"
43
 
44
- # PDFs in repo
45
  PDF_DIR = Path("data/pdfs")
46
 
47
- # Persistent disk if enabled on Spaces
48
  PERSIST_ROOT = Path("/data") if Path("/data").exists() else Path(".")
49
  VDB_DIR = PERSIST_ROOT / "chroma"
50
 
@@ -69,6 +73,7 @@ def download_logo() -> str | None:
69
  return None
70
 
71
  def build_or_load_index():
 
72
  if not os.getenv("OPENAI_API_KEY"):
73
  raise RuntimeError("OPENAI_API_KEY is not set. Add it in Space Settings → Repository secrets.")
74
 
@@ -84,6 +89,7 @@ def build_or_load_index():
84
  Settings.llm = LIOpenAI(model=LLM_MODEL, temperature=0.0)
85
  Settings.node_parser = SentenceSplitter(chunk_size=900, chunk_overlap=150)
86
 
 
87
  docs = SimpleDirectoryReader(
88
  input_dir=str(PDF_DIR),
89
  required_exts=[".pdf"],
@@ -94,7 +100,7 @@ def build_or_load_index():
94
  VDB_DIR.mkdir(parents=True, exist_ok=True)
95
  chroma_client = chromadb.PersistentClient(path=str(VDB_DIR))
96
 
97
- # Reuse existing collection if it already has vectors
98
  try:
99
  col = chroma_client.get_collection(COLLECTION_NAME)
100
  try:
@@ -102,7 +108,8 @@ def build_or_load_index():
102
  vector_store = ChromaVectorStore(chroma_collection=col)
103
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
104
  return VectorStoreIndex.from_vector_store(
105
- vector_store=vector_store, storage_context=storage_context
 
106
  )
107
  except Exception:
108
  pass
@@ -135,11 +142,14 @@ def format_sources(resp, max_sources=5) -> str:
135
  lines.append(f"{i}) {doc} | page {page} | score {score:.3f}")
136
  return "\n".join(lines)
137
 
 
 
 
 
138
  # -----------------------------
139
  # Build index + chat engine
140
  # -----------------------------
141
  INDEX = build_or_load_index()
142
-
143
  CHAT_ENGINE = INDEX.as_chat_engine(
144
  chat_mode="context",
145
  similarity_top_k=5,
@@ -147,10 +157,9 @@ CHAT_ENGINE = INDEX.as_chat_engine(
147
  )
148
 
149
  # -----------------------------
150
- # Gradio callbacks (MESSAGES format)
151
- # history is: [{"role":"user","content":"..."}, {"role":"assistant","content":"..."}, ...]
152
  # -----------------------------
153
- def answer(user_msg: str, history: list, show_sources: bool):
154
  user_msg = (user_msg or "").strip()
155
  if not user_msg:
156
  return history, ""
@@ -161,11 +170,18 @@ def answer(user_msg: str, history: list, show_sources: bool):
161
  if show_sources:
162
  text = text + "\n\n" + format_sources(resp)
163
 
164
- # Append messages (this fixes your error)
165
- history = (history or []) + [
166
- {"role": "user", "content": user_msg},
167
- {"role": "assistant", "content": text},
168
- ]
 
 
 
 
 
 
 
169
  return history, ""
170
 
171
  def load_faq(faq_choice: str):
@@ -179,7 +195,7 @@ def clear_chat():
179
  # -----------------------------
180
  logo_path = download_logo()
181
 
182
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
183
  with gr.Row():
184
  if logo_path:
185
  gr.Image(value=logo_path, show_label=False, height=70, width=70, container=False)
@@ -199,8 +215,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
199
  clear_btn = gr.Button("Clear chat")
200
 
201
  with gr.Column(scale=2, min_width=520):
202
- # IMPORTANT: type="messages"
203
- chatbot = gr.Chatbot(label="DDS HR Assistant", height=520, type="messages")
204
  user_input = gr.Textbox(label="Your question", placeholder="Ask a policy question and press Enter")
205
  send_btn = gr.Button("Send")
206
 
 
1
+ # app.py — DDS HR Chatbot (RAG Demo) for Hugging Face Spaces
2
+ # Fixes: Gradio Chatbot history format mismatch WITHOUT using Chatbot(type="messages")
3
+ # Works across Gradio versions by auto-detecting whether Chatbot expects dict-messages or tuple-history.
4
+
5
  import os
6
  from pathlib import Path
7
  import requests
 
45
 
46
  LOGO_RAW_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/airesidency/main/dds-logo-removebg-preview.png"
47
 
48
+ # PDFs live in repo under ./data/pdfs
49
  PDF_DIR = Path("data/pdfs")
50
 
51
+ # Persistent disk if enabled on Spaces (recommended). Otherwise local folder.
52
  PERSIST_ROOT = Path("/data") if Path("/data").exists() else Path(".")
53
  VDB_DIR = PERSIST_ROOT / "chroma"
54
 
 
73
  return None
74
 
75
  def build_or_load_index():
76
+ # Ensure OpenAI key exists (HF Spaces Secrets → OPENAI_API_KEY)
77
  if not os.getenv("OPENAI_API_KEY"):
78
  raise RuntimeError("OPENAI_API_KEY is not set. Add it in Space Settings → Repository secrets.")
79
 
 
89
  Settings.llm = LIOpenAI(model=LLM_MODEL, temperature=0.0)
90
  Settings.node_parser = SentenceSplitter(chunk_size=900, chunk_overlap=150)
91
 
92
+ # Read docs
93
  docs = SimpleDirectoryReader(
94
  input_dir=str(PDF_DIR),
95
  required_exts=[".pdf"],
 
100
  VDB_DIR.mkdir(parents=True, exist_ok=True)
101
  chroma_client = chromadb.PersistentClient(path=str(VDB_DIR))
102
 
103
+ # Reuse existing collection if it has vectors
104
  try:
105
  col = chroma_client.get_collection(COLLECTION_NAME)
106
  try:
 
108
  vector_store = ChromaVectorStore(chroma_collection=col)
109
  storage_context = StorageContext.from_defaults(vector_store=vector_store)
110
  return VectorStoreIndex.from_vector_store(
111
+ vector_store=vector_store,
112
+ storage_context=storage_context,
113
  )
114
  except Exception:
115
  pass
 
142
  lines.append(f"{i}) {doc} | page {page} | score {score:.3f}")
143
  return "\n".join(lines)
144
 
145
+ def _is_messages_history(history):
146
+ # messages history = list[{"role":..., "content":...}, ...]
147
+ return isinstance(history, list) and (len(history) == 0 or isinstance(history[0], dict))
148
+
149
  # -----------------------------
150
  # Build index + chat engine
151
  # -----------------------------
152
  INDEX = build_or_load_index()
 
153
  CHAT_ENGINE = INDEX.as_chat_engine(
154
  chat_mode="context",
155
  similarity_top_k=5,
 
157
  )
158
 
159
  # -----------------------------
160
+ # Gradio callbacks (version-compatible)
 
161
  # -----------------------------
162
+ def answer(user_msg: str, history, show_sources: bool):
163
  user_msg = (user_msg or "").strip()
164
  if not user_msg:
165
  return history, ""
 
170
  if show_sources:
171
  text = text + "\n\n" + format_sources(resp)
172
 
173
+ history = history or []
174
+
175
+ # If this Gradio Chatbot expects "messages" format
176
+ if _is_messages_history(history):
177
+ history = history + [
178
+ {"role": "user", "content": user_msg},
179
+ {"role": "assistant", "content": text},
180
+ ]
181
+ return history, ""
182
+
183
+ # Else assume legacy tuple format: [(user, bot), ...]
184
+ history = history + [(user_msg, text)]
185
  return history, ""
186
 
187
  def load_faq(faq_choice: str):
 
195
  # -----------------------------
196
  logo_path = download_logo()
197
 
198
+ with gr.Blocks() as demo:
199
  with gr.Row():
200
  if logo_path:
201
  gr.Image(value=logo_path, show_label=False, height=70, width=70, container=False)
 
215
  clear_btn = gr.Button("Clear chat")
216
 
217
  with gr.Column(scale=2, min_width=520):
218
+ # NOTE: no 'type' kwarg to avoid version errors
219
+ chatbot = gr.Chatbot(label="DDS HR Assistant", height=520)
220
  user_input = gr.Textbox(label="Your question", placeholder="Ask a policy question and press Enter")
221
  send_btn = gr.Button("Send")
222