Spaces:
Running
Running
| import re | |
| import os | |
| import json | |
| import glob | |
| import time | |
| import random | |
| import sys | |
| # Ensure project root is in sys.path | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from groq import Groq | |
| from utils.config import settings | |
| # Initialize Groq | |
| if not settings.GROQ_API_KEY: | |
| raise ValueError("GROQ_API_KEY is not set.") | |
| client = Groq(api_key=settings.GROQ_API_KEY) | |
| model = settings.GROQ_MODEL or "llama-3.3-70b-versatile" | |
| def enrich_amendment_summary(file_path): | |
| print(f"Processing {file_path}...") | |
| try: | |
| with open(file_path, "r") as f: | |
| data = json.load(f) | |
| original_desc = data["metadata"].get("amendment_description", "") | |
| title = data["metadata"].get("amendment_title", "") | |
| prompt = f""" | |
| You are a Constitutional Legal Expert. Your task is to EXPAND the summary of a Constitutional Amendment to include specific details about KEY ARTICLES that were added, deleted, or modified. | |
| Amendment: {title} | |
| Original Summary: {original_desc} | |
| Your Goal: | |
| 1. Identify the major articles mentioned (e.g., Article 19, 31, 368, 42nd Amendment changes). | |
| 2. Explicitly state WHAT changed for these articles. Did it delete a Right? Did it add a Duty? Did it change 'internal disturbance' to 'armed rebellion'? | |
| 3. Be precise with Article numbers and Clauses (e.g. 19(1)(f)). | |
| 4. **Identify Cross-Article Effects**: Does the amendment modify an Article that impacts *another* Article? (e.g. "Article 358 restricts Article 19"). Explicitly state if one article suspends or overrides another. | |
| 5. Do NOT hallucinate. Use your knowledge of the Indian Constitution to fill in the semantic details implied by the original summary. | |
| Output ONLY the Enhanced Summary text. Do not add conversational filler. | |
| """ | |
| # Retry logic for Rate Limits | |
| for attempt in range(3): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that outputs detailed legal summaries."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| model=model | |
| ) | |
| break | |
| except Exception as e: | |
| # If rate limit error or groq error | |
| if "429" in str(e) or "rate_limit" in str(e): | |
| wait = (attempt + 1) * 5 | |
| print(f"Rate Limit hit. Waiting {wait}s...") | |
| time.sleep(wait) | |
| else: | |
| # other errors, maybe skip | |
| print(f"Error calling LLM: {e}") | |
| return | |
| else: | |
| print(f"Failed to enrich {file_path} after 3 retries.") | |
| return | |
| enhanced_summary = chat_completion.choices[0].message.content.strip() | |
| # Strip thought process if present (e.g. <think>...</think>) | |
| enhanced_summary = re.sub(r'<think>.*?</think>', '', enhanced_summary, flags=re.DOTALL).strip() | |
| enhanced_summary = re.sub(r'<thought>.*?</thought>', '', enhanced_summary, flags=re.DOTALL).strip() | |
| # Update the JSON | |
| data["content"] = enhanced_summary | |
| data["metadata"]["enriched"] = True | |
| with open(file_path, "w") as f: | |
| json.dump(data, f, indent=2) | |
| print(f"Enriched {title}") | |
| except Exception as e: | |
| print(f"Error processing {file_path}: {e}") | |
| def main(): | |
| # Target ALL amendments | |
| summary_files = glob.glob("extracted_data/amendment_*/summary.json") | |
| summary_files = sorted(summary_files) | |
| print(f"Found {len(summary_files)} amendment summaries to enrich.") | |
| for i, file_path in enumerate(summary_files): | |
| print(f"[{i+1}/{len(summary_files)}] Processing {file_path}") | |
| enrich_amendment_summary(file_path) | |
| # Rate Limit Spacing (Sequential) | |
| time.sleep(2) | |
| if __name__ == "__main__": | |
| main() | |