LexGuard-EU / api.py
TeanShow's picture
Upload 8 files
8d17c17 verified
import os
import json
import zipfile
import json_repair
from docxtpl import DocxTemplate
from openai import OpenAI
from datetime import datetime
import chromadb
from sentence_transformers import SentenceTransformer
API_KEY = os.getenv("DEEPSEEK_API_KEY")
BASE_URL = "https://api.deepseek.com"
TEMPLATES_DIR = "tagged_templates"
DOWNLOADS_DIR = "downloads"
REGISTRY_FILE = "templates_registry.json"
TAGS_DB_FILE = "tags_db.json"
DB_PATH = "./legal_db"
PROMPTS = {
"router": """
You are a Legal Document Dispatcher. Your goal is to identify the most suitable document template from the list below based on the user's request.
AVAILABLE TEMPLATES:
{docs_list}
INSTRUCTION:
Return ONLY a JSON object: {{"filename": "exact_name.docx"}}
If no suitable template is found, return: {{"filename": null}}
""",
"ner_extractor": """
You are a Legal Data Extraction specialist. Your task is to extract entity information from the user's query into a structured JSON format.
DATE FORMAT: dd.mm.yyyy
REQUIRED SCHEMA:
{schema}
""",
"consultant": """
You are LexGuard AI, a professional legal assistant specializing in EU Law and GDPR.
Provide accurate, structured, and formal legal advice based on the provided context.
GUIDELINES:
1. CITATIONS: Always mention specific GDPR Articles or Recitals if they are present in the context.
2. LIMITATIONS: If the context doesn't contain the answer, use your general knowledge of EU Law but clearly state it is general information.
3. STRUCTURE: Use Markdown (bolding, bullet points) for clarity.
4. TONE: Professional, objective, and helpful.
GDPR DATABASE CONTEXT:
{context}
"""
}
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
collection = None
encoder = None
try:
encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
chroma_client = chromadb.PersistentClient(path=DB_PATH)
collection = chroma_client.get_collection(name="laws")
print("✅ ChromaDB and Encoder initialized")
except Exception as e:
print(f"⚠️ RAG initialization error: {e}")
try:
with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
registry = json.load(f)
with open(TAGS_DB_FILE, "r", encoding="utf-8") as f:
tags_db = json.load(f)
clean_tags_db = {k: v for k, v in tags_db.items() if not k.startswith("_")}
except Exception as e:
print(f"⚠️ Config files loading error: {e}")
registry, clean_tags_db = [], {}
async def select_best_template(user_query):
"""Identifies the best document template using LLM reasoning."""
docs_list = "\n".join([f"- {item['filename']} ({item.get('description', '')})" for item in registry])
try:
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": PROMPTS["router"].format(docs_list=docs_list)},
{"role": "user", "content": user_query}
],
response_format={"type": "json_object"},
temperature=0.0
)
result = json_repair.loads(response.choices[0].message.content)
return result.get("filename")
except Exception as e:
print(f"⚠️ Router Error: {e}")
return None
async def extract_data_from_chat(user_query, filename):
"""Extracts required data fields for the document."""
schema = "\n".join([f"- {v['tag']}: {v['description']}" for k, v in clean_tags_db.items()])
try:
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": PROMPTS["ner_extractor"].format(schema=schema)},
{"role": "user", "content": user_query}
],
response_format={"type": "json_object"},
temperature=0.1
)
return json_repair.loads(response.choices[0].message.content)
except Exception as e:
print(f"⚠️ Extraction Error: {e}")
return {}
async def consult_logic(user_text):
"""Core RAG consultation logic."""
context = "No specific articles found in the database."
# RAG: Retrieve context from ChromaDB
if collection and encoder:
try:
vec = encoder.encode(user_text).tolist()
res = collection.query(query_embeddings=[vec], n_results=3)
if res['documents'] and res['documents'][0]:
context = "\n---\n".join(res['documents'][0])
except Exception as e:
print(f"⚠️ Vector Search Error: {e}")
try:
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": PROMPTS["consultant"].format(context=context)},
{"role": "user", "content": f"User Question: {user_text}"}
],
temperature=0.3
)
return {"type": "text", "content": response.choices[0].message.content}
except Exception as e:
return {"type": "text", "content": f"⚠️ Connection Error: {str(e)}"}
async def generate_doc_logic(user_text):
"""Handles the document generation pipeline (Currently in development)."""
best_filename = await select_best_template(user_text)
if not best_filename:
fallback = await consult_logic(f"Draft a response for: {user_text}")
fallback["content"] = "⚠️ **No matching template found.** Here is a manual draft:\n\n" + fallback["content"]
return fallback
template_path = os.path.join(TEMPLATES_DIR, best_filename)
if not os.path.exists(template_path):
return {"type": "text", "content": f"⚠️ Template file '{best_filename}' not found on server."}
data = await extract_data_from_chat(user_text, best_filename)
if "doc_date" not in data: data["doc_date"] = datetime.now().strftime("%d.%m.%Y")
try:
doc = DocxTemplate(template_path)
doc.render(data)
os.makedirs(DOWNLOADS_DIR, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
out_name = f"LexGuard_{ts}_{best_filename}"
out_path = os.path.join(DOWNLOADS_DIR, out_name)
doc.save(out_path)
return {
"type": "file",
"content": f"✅ Document successfully generated using template: **{best_filename}**",
"file_url": out_path
}
except Exception as e:
return {"type": "text", "content": f"⚠️ Generation error: {e}"}