ConstitutionAgent / services /context_organization.py
Meshyboi's picture
Upload 53 files
0cd3dc5 verified
from groq import Groq
from utils.config import settings
import json
class ContextOrganizationService:
def __init__(self):
self.provider = "groq"
if settings.GROQ_API_KEY:
self.provider = "groq"
self.client = Groq(api_key=settings.GROQ_API_KEY)
self.model_name = settings.GROQ_MODEL
print(f"ContextOrganizationService initialized with Groq model: {self.model_name}")
else:
raise ValueError("GROQ_API_KEY is not set.")
def _get_json_response(self, prompt: str) -> dict:
"""Robust method to get JSON response, handling API errors and Markdown"""
import re
import time
for attempt in range(3):
try:
# Primary Strategy: Strict JSON Mode
try:
chat_completion = self.client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant that outputs JSON."},
{"role": "user", "content": prompt}
],
model=self.model_name,
temperature=0,
response_format={"type": "json_object"}
)
content = chat_completion.choices[0].message.content
return json.loads(content.strip())
except Exception as api_err:
# Fallback Strategy: Text Mode if JSON validation fails
if "400" in str(api_err) or "json_validate_failed" in str(api_err):
print(f"JSON Mode failed. Falling back to Text Mode (Attempt {attempt+1})...")
chat_completion = self.client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant that outputs JSON."},
{"role": "user", "content": prompt + "\n\nOUTPUT RAW JSON ONLY. NO MARKDOWN."}
],
model=self.model_name,
temperature=0
)
content = chat_completion.choices[0].message.content
# Strip Markdown Code Blocks
match = re.search(r"```(?:json)?(.*?)```", content, re.DOTALL)
if match:
content = match.group(1)
return json.loads(content.strip())
raise api_err
except Exception as e:
print(f"Error in LLM request (Attempt {attempt+1}): {e}")
if attempt < 2:
time.sleep(2)
else:
return {}
return {}
def organize_context(self, query: str, chunks: list, graph_data: list = None) -> dict:
# Deterministic Organization (No LLM)
# 1. Determine the latest year for each article to flag [CURRENT ACTIVE LAW]
latest_year_per_article = {}
for c in chunks:
article = c.get("metadata", {}).get("article_number")
year = c.get("metadata", {}).get("year", 0) or 0
if article:
if article not in latest_year_per_article or year > latest_year_per_article[article]:
latest_year_per_article[article] = year
# 2. Identify Repealed or Deleted Articles from Graph Data
repealed_articles = {}
if graph_data:
for edge in graph_data:
rel = edge.get("relationship", "") or edge.get("change_type", "")
if rel in ["DELETES_CLAUSE", "REPEALS", "DELETES"]:
target = str(edge.get("target_id", "")) or str(edge.get("related_article", ""))
amd = edge.get("amendment", "Unknown")
if target:
repealed_articles[target] = amd
# 3. Sort chunks by Article, then Year for chronological flow
def get_sort_key(c):
meta = c.get("metadata", {})
return (str(meta.get("article_number", "0")), meta.get("year", 0) or 0)
sorted_chunks = sorted(chunks, key=get_sort_key)
# 4. Format Chunks for Context
formatted_chunks = []
for c in sorted_chunks:
meta = c.get("metadata", {})
article = str(meta.get("article_number", "?"))
year = meta.get("year", 0) or 0
# Clean up useless metadata to save tokens
meta.pop("source_file", None)
status_flag = ""
# Check Graph to see if this article was explicitly repealed
is_repealed = False
for target_id, amd_num in repealed_articles.items():
if article == target_id or article.startswith(target_id + "(") or target_id.startswith(article + "("):
status_flag = f" [REPEALED/DELETED BY AMD {amd_num}]"
is_repealed = True
break
# If not repealed, check if it's the latest version
if not is_repealed and article in latest_year_per_article and year == latest_year_per_article.get(article):
status_flag = " [CURRENT ACTIVE LAW]"
ref = f"Article {article} (Amd {meta.get('amendment_number', 'Original')}, Year {year}){status_flag}"
formatted_chunks.append({
"source": ref,
"content": c.get("text", "")[:2000] # Cap text length
})
# 3. Format Graph Data
# We pass it raw but ensure it's clean
clean_graph = []
if graph_data:
clean_graph = graph_data
return {
"organized_chunks": formatted_chunks,
"graph_data": clean_graph,
"meta_info": {
"total_chunks": len(chunks),
"strategy": "Deterministic Chronological"
}
}