igerasimov commited on
Commit
6c5dad7
·
1 Parent(s): 119e15a

feat: add topic prepending and expose invalid keywords output

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -123,7 +123,8 @@ def classify_individual_topic(topic_name: str):
123
  valid_set.add(path)
124
 
125
  # Filter generated terms deterministically
126
- valid_kws = [kw for kw in raw_keywords if kw in valid_set]
 
127
  invalid_kws = [kw for kw in raw_keywords if kw not in valid_set]
128
  return {"predicted_keywords": valid_kws, "invalid_keywords": invalid_kws}
129
 
@@ -161,20 +162,28 @@ app = workflow.compile()
161
  # ==========================================================
162
  async def run_agent_classifier(title, abstract):
163
  if not title or not abstract:
164
- return "Please fill out both Title and Abstract fields.", "N/A"
165
 
166
  inputs = {"title": title, "abstract": abstract}
167
  output = await app.ainvoke(inputs)
168
 
169
  topics_str = ", ".join(output.get("chosen_topics", []))
170
  keywords_list = output.get("predicted_keywords", [])
 
171
 
 
172
  if not keywords_list:
173
  keywords_str = "No explicit keywords mapped."
174
  else:
175
  keywords_str = "\n".join([f"• {kw}" for kw in keywords_list])
176
 
177
- return topics_str, keywords_str
 
 
 
 
 
 
178
 
179
  demo = gr.Interface(
180
  fn=run_agent_classifier,
@@ -184,7 +193,8 @@ demo = gr.Interface(
184
  ],
185
  outputs=[
186
  gr.Textbox(label="Routed Multi-Topic Domains"),
187
- gr.Textbox(label="Verified GCMD Keywords Extracted")
 
188
  ],
189
  title="GCMD Science Keyword Classifier Agent",
190
  description="Proof of Concept using LangGraph and LangChain. Routes articles concurrently across science domains and runs isolated self-validation routines.",
 
123
  valid_set.add(path)
124
 
125
  # Filter generated terms deterministically
126
+ # Prepend the topic string to the final outputs so they present nicely
127
+ valid_kws = [f"{topic_name} > {kw}" for kw in raw_keywords if kw in valid_set]
128
  invalid_kws = [kw for kw in raw_keywords if kw not in valid_set]
129
  return {"predicted_keywords": valid_kws, "invalid_keywords": invalid_kws}
130
 
 
162
  # ==========================================================
163
  async def run_agent_classifier(title, abstract):
164
  if not title or not abstract:
165
+ return "Please fill out both Title and Abstract fields.", "N/A", "N/A"
166
 
167
  inputs = {"title": title, "abstract": abstract}
168
  output = await app.ainvoke(inputs)
169
 
170
  topics_str = ", ".join(output.get("chosen_topics", []))
171
  keywords_list = output.get("predicted_keywords", [])
172
+ invalid_list = output.get("invalid_keywords", [])
173
 
174
+ # Format valid output display strings
175
  if not keywords_list:
176
  keywords_str = "No explicit keywords mapped."
177
  else:
178
  keywords_str = "\n".join([f"• {kw}" for kw in keywords_list])
179
 
180
+ # Format invalid output display strings
181
+ if not invalid_list:
182
+ invalid_str = "None! The agent validation loop passed with 100% data integrity."
183
+ else:
184
+ invalid_str = "\n".join([f"⚠ Caught & Removed: {ikw}" for ikw in invalid_list])
185
+
186
+ return topics_str, keywords_str, invalid_str
187
 
188
  demo = gr.Interface(
189
  fn=run_agent_classifier,
 
193
  ],
194
  outputs=[
195
  gr.Textbox(label="Routed Multi-Topic Domains"),
196
+ gr.Textbox(label="Verified GCMD Keywords Extracted (with Topics)"),
197
+ gr.Textbox(label="Hallucinated/Invalid Keywords Caught and Removed")
198
  ],
199
  title="GCMD Science Keyword Classifier Agent",
200
  description="Proof of Concept using LangGraph and LangChain. Routes articles concurrently across science domains and runs isolated self-validation routines.",