Spaces:
Sleeping
Sleeping
| from groq import Groq | |
| from utils.config import settings | |
| import json | |
| class ContextOrganizationService: | |
| def __init__(self): | |
| self.provider = "groq" | |
| if settings.GROQ_API_KEY: | |
| self.provider = "groq" | |
| self.client = Groq(api_key=settings.GROQ_API_KEY) | |
| self.model_name = settings.GROQ_MODEL | |
| print(f"ContextOrganizationService initialized with Groq model: {self.model_name}") | |
| else: | |
| raise ValueError("GROQ_API_KEY is not set.") | |
| def _get_json_response(self, prompt: str) -> dict: | |
| """Robust method to get JSON response, handling API errors and Markdown""" | |
| import re | |
| import time | |
| for attempt in range(3): | |
| try: | |
| # Primary Strategy: Strict JSON Mode | |
| try: | |
| chat_completion = self.client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that outputs JSON."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| model=self.model_name, | |
| temperature=0, | |
| response_format={"type": "json_object"} | |
| ) | |
| content = chat_completion.choices[0].message.content | |
| return json.loads(content.strip()) | |
| except Exception as api_err: | |
| # Fallback Strategy: Text Mode if JSON validation fails | |
| if "400" in str(api_err) or "json_validate_failed" in str(api_err): | |
| print(f"JSON Mode failed. Falling back to Text Mode (Attempt {attempt+1})...") | |
| chat_completion = self.client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that outputs JSON."}, | |
| {"role": "user", "content": prompt + "\n\nOUTPUT RAW JSON ONLY. NO MARKDOWN."} | |
| ], | |
| model=self.model_name, | |
| temperature=0 | |
| ) | |
| content = chat_completion.choices[0].message.content | |
| # Strip Markdown Code Blocks | |
| match = re.search(r"```(?:json)?(.*?)```", content, re.DOTALL) | |
| if match: | |
| content = match.group(1) | |
| return json.loads(content.strip()) | |
| raise api_err | |
| except Exception as e: | |
| print(f"Error in LLM request (Attempt {attempt+1}): {e}") | |
| if attempt < 2: | |
| time.sleep(2) | |
| else: | |
| return {} | |
| return {} | |
| def organize_context(self, query: str, chunks: list, graph_data: list = None) -> dict: | |
| # Deterministic Organization (No LLM) | |
| # 1. Determine the latest year for each article to flag [CURRENT ACTIVE LAW] | |
| latest_year_per_article = {} | |
| for c in chunks: | |
| article = c.get("metadata", {}).get("article_number") | |
| year = c.get("metadata", {}).get("year", 0) or 0 | |
| if article: | |
| if article not in latest_year_per_article or year > latest_year_per_article[article]: | |
| latest_year_per_article[article] = year | |
| # 2. Identify Repealed or Deleted Articles from Graph Data | |
| repealed_articles = {} | |
| if graph_data: | |
| for edge in graph_data: | |
| rel = edge.get("relationship", "") or edge.get("change_type", "") | |
| if rel in ["DELETES_CLAUSE", "REPEALS", "DELETES"]: | |
| target = str(edge.get("target_id", "")) or str(edge.get("related_article", "")) | |
| amd = edge.get("amendment", "Unknown") | |
| if target: | |
| repealed_articles[target] = amd | |
| # 3. Sort chunks by Article, then Year for chronological flow | |
| def get_sort_key(c): | |
| meta = c.get("metadata", {}) | |
| return (str(meta.get("article_number", "0")), meta.get("year", 0) or 0) | |
| sorted_chunks = sorted(chunks, key=get_sort_key) | |
| # 4. Format Chunks for Context | |
| formatted_chunks = [] | |
| for c in sorted_chunks: | |
| meta = c.get("metadata", {}) | |
| article = str(meta.get("article_number", "?")) | |
| year = meta.get("year", 0) or 0 | |
| # Clean up useless metadata to save tokens | |
| meta.pop("source_file", None) | |
| status_flag = "" | |
| # Check Graph to see if this article was explicitly repealed | |
| is_repealed = False | |
| for target_id, amd_num in repealed_articles.items(): | |
| if article == target_id or article.startswith(target_id + "(") or target_id.startswith(article + "("): | |
| status_flag = f" [REPEALED/DELETED BY AMD {amd_num}]" | |
| is_repealed = True | |
| break | |
| # If not repealed, check if it's the latest version | |
| if not is_repealed and article in latest_year_per_article and year == latest_year_per_article.get(article): | |
| status_flag = " [CURRENT ACTIVE LAW]" | |
| ref = f"Article {article} (Amd {meta.get('amendment_number', 'Original')}, Year {year}){status_flag}" | |
| formatted_chunks.append({ | |
| "source": ref, | |
| "content": c.get("text", "")[:2000] # Cap text length | |
| }) | |
| # 3. Format Graph Data | |
| # We pass it raw but ensure it's clean | |
| clean_graph = [] | |
| if graph_data: | |
| clean_graph = graph_data | |
| return { | |
| "organized_chunks": formatted_chunks, | |
| "graph_data": clean_graph, | |
| "meta_info": { | |
| "total_chunks": len(chunks), | |
| "strategy": "Deterministic Chronological" | |
| } | |
| } | |