Spaces:

Meshyboi
/

ConstitutionAgent

Sleeping

App Files Files Community

ConstitutionAgent / services /context_organization.py

Meshyboi

Upload 53 files

0cd3dc5 verified about 1 month ago

raw

history blame contribute delete

6.22 kB

	from groq import Groq
	from utils.config import settings
	import json

	class ContextOrganizationService:
	def __init__(self):
	self.provider = "groq"

	if settings.GROQ_API_KEY:
	self.provider = "groq"
	self.client = Groq(api_key=settings.GROQ_API_KEY)
	self.model_name = settings.GROQ_MODEL
	print(f"ContextOrganizationService initialized with Groq model: {self.model_name}")
	else:
	raise ValueError("GROQ_API_KEY is not set.")

	def _get_json_response(self, prompt: str) -> dict:
	"""Robust method to get JSON response, handling API errors and Markdown"""
	import re
	import time

	for attempt in range(3):
	try:
	# Primary Strategy: Strict JSON Mode
	try:
	chat_completion = self.client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant that outputs JSON."},
	{"role": "user", "content": prompt}
	],
	model=self.model_name,
	temperature=0,
	response_format={"type": "json_object"}
	)
	content = chat_completion.choices[0].message.content
	return json.loads(content.strip())
	except Exception as api_err:
	# Fallback Strategy: Text Mode if JSON validation fails
	if "400" in str(api_err) or "json_validate_failed" in str(api_err):
	print(f"JSON Mode failed. Falling back to Text Mode (Attempt {attempt+1})...")
	chat_completion = self.client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant that outputs JSON."},
	{"role": "user", "content": prompt + "\n\nOUTPUT RAW JSON ONLY. NO MARKDOWN."}
	],
	model=self.model_name,
	temperature=0
	)
	content = chat_completion.choices[0].message.content

	# Strip Markdown Code Blocks
	match = re.search(r"```(?:json)?(.*?)```", content, re.DOTALL)
	if match:
	content = match.group(1)

	return json.loads(content.strip())
	raise api_err

	except Exception as e:
	print(f"Error in LLM request (Attempt {attempt+1}): {e}")
	if attempt < 2:
	time.sleep(2)
	else:
	return {}
	return {}

	def organize_context(self, query: str, chunks: list, graph_data: list = None) -> dict:
	# Deterministic Organization (No LLM)

	# 1. Determine the latest year for each article to flag [CURRENT ACTIVE LAW]
	latest_year_per_article = {}
	for c in chunks:
	article = c.get("metadata", {}).get("article_number")
	year = c.get("metadata", {}).get("year", 0) or 0
	if article:
	if article not in latest_year_per_article or year > latest_year_per_article[article]:
	latest_year_per_article[article] = year

	# 2. Identify Repealed or Deleted Articles from Graph Data
	repealed_articles = {}
	if graph_data:
	for edge in graph_data:
	rel = edge.get("relationship", "") or edge.get("change_type", "")
	if rel in ["DELETES_CLAUSE", "REPEALS", "DELETES"]:
	target = str(edge.get("target_id", "")) or str(edge.get("related_article", ""))
	amd = edge.get("amendment", "Unknown")
	if target:
	repealed_articles[target] = amd

	# 3. Sort chunks by Article, then Year for chronological flow
	def get_sort_key(c):
	meta = c.get("metadata", {})
	return (str(meta.get("article_number", "0")), meta.get("year", 0) or 0)

	sorted_chunks = sorted(chunks, key=get_sort_key)

	# 4. Format Chunks for Context
	formatted_chunks = []
	for c in sorted_chunks:
	meta = c.get("metadata", {})
	article = str(meta.get("article_number", "?"))
	year = meta.get("year", 0) or 0

	# Clean up useless metadata to save tokens
	meta.pop("source_file", None)

	status_flag = ""

	# Check Graph to see if this article was explicitly repealed
	is_repealed = False
	for target_id, amd_num in repealed_articles.items():
	if article == target_id or article.startswith(target_id + "(") or target_id.startswith(article + "("):
	status_flag = f" [REPEALED/DELETED BY AMD {amd_num}]"
	is_repealed = True
	break

	# If not repealed, check if it's the latest version
	if not is_repealed and article in latest_year_per_article and year == latest_year_per_article.get(article):
	status_flag = " [CURRENT ACTIVE LAW]"

	ref = f"Article {article} (Amd {meta.get('amendment_number', 'Original')}, Year {year}){status_flag}"
	formatted_chunks.append({
	"source": ref,
	"content": c.get("text", "")[:2000] # Cap text length
	})

	# 3. Format Graph Data
	# We pass it raw but ensure it's clean
	clean_graph = []
	if graph_data:
	clean_graph = graph_data

	return {
	"organized_chunks": formatted_chunks,
	"graph_data": clean_graph,
	"meta_info": {
	"total_chunks": len(chunks),
	"strategy": "Deterministic Chronological"
	}
	}