Spaces:

VAILL
/

legislation-tracker

Running on CPU Upgrade

App Files Files Community

legislation-tracker / scoping_agent.py

ramanna

Deploy: newsletter display polish

3cc39aa 27 days ago

raw

history blame contribute delete

5.36 kB

	#!/usr/bin/env python3
	"""
	Scoping agent for AI legislation research using LangChain Open Deep Research patterns.

	Focus: Phase 1 (Scoping) – User Clarification + Brief Generation.

	Inputs: freeform research question.
	Outputs: structured JSON object for downstream planning/tooling.

	This module avoids web research and is tailored for internal dataset integration later
	(`data/known_bills_visualize.json`).
	"""

	from __future__ import annotations

	import os
	from typing import List, Optional, Literal, Dict, Any

	from pydantic import BaseModel, Field
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import JsonOutputParser


	class SubQuestion(BaseModel):
	id: str = Field(..., description="Stable identifier for the sub-question")
	text: str = Field(..., description="Concrete, answerable sub-question")
	strategy: Optional[Literal[
	"fact_finding",
	"timeline_analysis",
	"comparative_review",
	"trend_analysis",
	"dataset_query",
	"definition_mapping",
	"stakeholder_mapping",
	"gap_identification",
	]] = Field(
	default=None,
	description="Suggested research strategy type for this sub-question",
	)
	notes: Optional[str] = Field(
	default=None,
	description="Brief guidance, constraints, or hints for this sub-question",
	)


	class ResearchScope(BaseModel):
	clarified_question: str
	scope_statement: str
	assumptions: List[str]
	exclusions: List[str]
	key_terms: List[str]
	jurisdictions: List[str]
	time_horizon: Optional[str] = None
	sub_questions: List[SubQuestion]
	suggested_directions: List[str]
	dataset_alignment: Dict[str, Any] = Field(
	default_factory=dict,
	description="Hints for how to query the internal dataset (fields, filters)",
	)


	SYSTEM_PROMPT = """
	You are a senior research methodologist specializing in AI legislation. Your job is to perform
	the Scoping phase of a deep research workflow: clarify the user's question and produce a concise,
	well-structured research brief tailored for legislative analysis using an internal dataset of
	~2,000+ US federal and state AI-related bills.

	Guidelines:
	- Keep scope precise and operational. Avoid generic platitudes.
	- Break the problem into 5–10 concrete sub-questions, each with an optional strategy label
	(fact_finding, timeline_analysis, comparative_review, trend_analysis, dataset_query,
	definition_mapping, stakeholder_mapping, gap_identification).
	- Prefer directions that can be executed via structured queries over the internal dataset later.
	- Include key terms to anchor retrieval (bill fields, concepts, definitions, agencies, topics).
	- Include assumptions and explicit exclusions to bound the work.
	- Include a dataset_alignment section that anticipates filters/fields such as: state, session_year,
	status, iapp_categories, last_action_date, sponsors, title, summary.

	Output MUST be valid JSON only, matching the provided JSON schema.
	"""


	USER_TEMPLATE = """
	User research question:
	{user_question}

	Produce a JSON object that matches this pydantic schema:
	{format_instructions}
	"""


	def get_llm(api_key: Optional[str] = None, model: str = "gpt-5-mini", temperature: float = 0.2) -> ChatOpenAI:
	key = api_key or os.environ.get("OPENAI_API_KEY")
	if not key:
	raise RuntimeError("OPENAI_API_KEY not set. Provide via env or argument.")
	return ChatOpenAI(api_key=key, model=model, temperature=temperature)


	def build_scoping_chain(llm: Optional[ChatOpenAI] = None):
	parser = JsonOutputParser(pydantic_object=ResearchScope)
	prompt = ChatPromptTemplate.from_messages([
	("system", SYSTEM_PROMPT),
	("user", USER_TEMPLATE),
	]).partial(format_instructions=parser.get_format_instructions())

	model = llm or get_llm()
	chain = prompt \| model \| parser
	return chain


	def scope_research_question(user_question: str, *, llm: Optional[ChatOpenAI] = None) -> ResearchScope:
	chain = build_scoping_chain(llm)
	result = chain.invoke({"user_question": user_question})
	# JsonOutputParser may return a plain dict; coerce to ResearchScope
	if isinstance(result, ResearchScope):
	return result
	return ResearchScope.model_validate(result)


	def to_json(scope: ResearchScope) -> str:
	import json as _json
	return _json.dumps(scope.model_dump(), indent=2, ensure_ascii=False)


	if __name__ == "__main__":
	import argparse
	import json as _json

	parser = argparse.ArgumentParser(description="Run the scoping agent.")
	parser.add_argument("question", type=str, help="Freeform research question")
	parser.add_argument(
	"--api-key",
	dest="api_key",
	type=str,
	default=None,
	help="OpenAI API key (overrides environment variable)",
	)
	parser.add_argument(
	"--model",
	dest="model",
	type=str,
	default="gpt-5-mini",
	help="Model name (default: gpt-5-mini)",
	)
	args = parser.parse_args()

	llm = None
	if args.api_key is not None:
	llm = get_llm(api_key=args.api_key, model=args.model)

	scope = scope_research_question(args.question, llm=llm)
	import json as _json
	print(_json.dumps(scope.model_dump(), indent=2, ensure_ascii=False))