import re import os import json import glob import time import random import sys # Ensure project root is in sys.path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from groq import Groq from utils.config import settings # Initialize Groq if not settings.GROQ_API_KEY: raise ValueError("GROQ_API_KEY is not set.") client = Groq(api_key=settings.GROQ_API_KEY) model = settings.GROQ_MODEL or "llama-3.3-70b-versatile" def enrich_amendment_summary(file_path): print(f"Processing {file_path}...") try: with open(file_path, "r") as f: data = json.load(f) original_desc = data["metadata"].get("amendment_description", "") title = data["metadata"].get("amendment_title", "") prompt = f""" You are a Constitutional Legal Expert. Your task is to EXPAND the summary of a Constitutional Amendment to include specific details about KEY ARTICLES that were added, deleted, or modified. Amendment: {title} Original Summary: {original_desc} Your Goal: 1. Identify the major articles mentioned (e.g., Article 19, 31, 368, 42nd Amendment changes). 2. Explicitly state WHAT changed for these articles. Did it delete a Right? Did it add a Duty? Did it change 'internal disturbance' to 'armed rebellion'? 3. Be precise with Article numbers and Clauses (e.g. 19(1)(f)). 4. **Identify Cross-Article Effects**: Does the amendment modify an Article that impacts *another* Article? (e.g. "Article 358 restricts Article 19"). Explicitly state if one article suspends or overrides another. 5. Do NOT hallucinate. Use your knowledge of the Indian Constitution to fill in the semantic details implied by the original summary. Output ONLY the Enhanced Summary text. Do not add conversational filler. """ # Retry logic for Rate Limits for attempt in range(3): try: chat_completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant that outputs detailed legal summaries."}, {"role": "user", "content": prompt} ], model=model ) break except Exception as e: # If rate limit error or groq error if "429" in str(e) or "rate_limit" in str(e): wait = (attempt + 1) * 5 print(f"Rate Limit hit. Waiting {wait}s...") time.sleep(wait) else: # other errors, maybe skip print(f"Error calling LLM: {e}") return else: print(f"Failed to enrich {file_path} after 3 retries.") return enhanced_summary = chat_completion.choices[0].message.content.strip() # Strip thought process if present (e.g. ...) enhanced_summary = re.sub(r'.*?', '', enhanced_summary, flags=re.DOTALL).strip() enhanced_summary = re.sub(r'.*?', '', enhanced_summary, flags=re.DOTALL).strip() # Update the JSON data["content"] = enhanced_summary data["metadata"]["enriched"] = True with open(file_path, "w") as f: json.dump(data, f, indent=2) print(f"Enriched {title}") except Exception as e: print(f"Error processing {file_path}: {e}") def main(): # Target ALL amendments summary_files = glob.glob("extracted_data/amendment_*/summary.json") summary_files = sorted(summary_files) print(f"Found {len(summary_files)} amendment summaries to enrich.") for i, file_path in enumerate(summary_files): print(f"[{i+1}/{len(summary_files)}] Processing {file_path}") enrich_amendment_summary(file_path) # Rate Limit Spacing (Sequential) time.sleep(2) if __name__ == "__main__": main()