Spaces:

Meshyboi
/

ConstitutionAgent

Running

App Files Files Community

ConstitutionAgent / data_tools /enrich_data.py

Meshyboi

Upload 53 files

0cd3dc5 verified 24 days ago

raw

history blame contribute delete

4.04 kB

	import re
	import os
	import json
	import glob
	import time
	import random
	import sys

	# Ensure project root is in sys.path
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from groq import Groq
	from utils.config import settings

	# Initialize Groq
	if not settings.GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY is not set.")

	client = Groq(api_key=settings.GROQ_API_KEY)
	model = settings.GROQ_MODEL or "llama-3.3-70b-versatile"

	def enrich_amendment_summary(file_path):
	print(f"Processing {file_path}...")
	try:
	with open(file_path, "r") as f:
	data = json.load(f)

	original_desc = data["metadata"].get("amendment_description", "")
	title = data["metadata"].get("amendment_title", "")

	prompt = f"""
	You are a Constitutional Legal Expert. Your task is to EXPAND the summary of a Constitutional Amendment to include specific details about KEY ARTICLES that were added, deleted, or modified.

	Amendment: {title}
	Original Summary: {original_desc}

	Your Goal:
	1. Identify the major articles mentioned (e.g., Article 19, 31, 368, 42nd Amendment changes).
	2. Explicitly state WHAT changed for these articles. Did it delete a Right? Did it add a Duty? Did it change 'internal disturbance' to 'armed rebellion'?
	3. Be precise with Article numbers and Clauses (e.g. 19(1)(f)).
	4. Identify Cross-Article Effects: Does the amendment modify an Article that impacts another Article? (e.g. "Article 358 restricts Article 19"). Explicitly state if one article suspends or overrides another.
	5. Do NOT hallucinate. Use your knowledge of the Indian Constitution to fill in the semantic details implied by the original summary.

	Output ONLY the Enhanced Summary text. Do not add conversational filler.
	"""

	# Retry logic for Rate Limits
	for attempt in range(3):
	try:
	chat_completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant that outputs detailed legal summaries."},
	{"role": "user", "content": prompt}
	],
	model=model
	)
	break
	except Exception as e:
	# If rate limit error or groq error
	if "429" in str(e) or "rate_limit" in str(e):
	wait = (attempt + 1) * 5
	print(f"Rate Limit hit. Waiting {wait}s...")
	time.sleep(wait)
	else:
	# other errors, maybe skip
	print(f"Error calling LLM: {e}")
	return
	else:
	print(f"Failed to enrich {file_path} after 3 retries.")
	return

	enhanced_summary = chat_completion.choices[0].message.content.strip()

	# Strip thought process if present (e.g. <think>...</think>)
	enhanced_summary = re.sub(r'<think>.*?</think>', '', enhanced_summary, flags=re.DOTALL).strip()
	enhanced_summary = re.sub(r'<thought>.*?</thought>', '', enhanced_summary, flags=re.DOTALL).strip()

	# Update the JSON
	data["content"] = enhanced_summary
	data["metadata"]["enriched"] = True

	with open(file_path, "w") as f:
	json.dump(data, f, indent=2)

	print(f"Enriched {title}")

	except Exception as e:
	print(f"Error processing {file_path}: {e}")

	def main():
	# Target ALL amendments
	summary_files = glob.glob("extracted_data/amendment_*/summary.json")
	summary_files = sorted(summary_files)

	print(f"Found {len(summary_files)} amendment summaries to enrich.")

	for i, file_path in enumerate(summary_files):
	print(f"[{i+1}/{len(summary_files)}] Processing {file_path}")
	enrich_amendment_summary(file_path)

	# Rate Limit Spacing (Sequential)
	time.sleep(2)

	if __name__ == "__main__":
	main()