File size: 6,441 Bytes
8d17c17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
import json
import zipfile
import json_repair
from docxtpl import DocxTemplate
from openai import OpenAI
from datetime import datetime
import chromadb
from sentence_transformers import SentenceTransformer
API_KEY = os.getenv("DEEPSEEK_API_KEY")
BASE_URL = "https://api.deepseek.com"
TEMPLATES_DIR = "tagged_templates"
DOWNLOADS_DIR = "downloads"
REGISTRY_FILE = "templates_registry.json"
TAGS_DB_FILE = "tags_db.json"
DB_PATH = "./legal_db"
PROMPTS = {
    "router": """
You are a Legal Document Dispatcher. Your goal is to identify the most suitable document template from the list below based on the user's request.
AVAILABLE TEMPLATES:
{docs_list}

INSTRUCTION:
Return ONLY a JSON object: {{"filename": "exact_name.docx"}}
If no suitable template is found, return: {{"filename": null}}
""",

    "ner_extractor": """
You are a Legal Data Extraction specialist. Your task is to extract entity information from the user's query into a structured JSON format.
DATE FORMAT: dd.mm.yyyy
REQUIRED SCHEMA:
{schema}
""",

    "consultant": """
You are LexGuard AI, a professional legal assistant specializing in EU Law and GDPR. 
Provide accurate, structured, and formal legal advice based on the provided context.

GUIDELINES:
1. CITATIONS: Always mention specific GDPR Articles or Recitals if they are present in the context.
2. LIMITATIONS: If the context doesn't contain the answer, use your general knowledge of EU Law but clearly state it is general information.
3. STRUCTURE: Use Markdown (bolding, bullet points) for clarity.
4. TONE: Professional, objective, and helpful.

GDPR DATABASE CONTEXT:
{context}
"""
}
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
collection = None
encoder = None

try:
    encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
    chroma_client = chromadb.PersistentClient(path=DB_PATH)
    collection = chroma_client.get_collection(name="laws")
    print("✅ ChromaDB and Encoder initialized")
except Exception as e:
    print(f"⚠️ RAG initialization error: {e}")
try:
    with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
        registry = json.load(f)
    with open(TAGS_DB_FILE, "r", encoding="utf-8") as f:
        tags_db = json.load(f)
    clean_tags_db = {k: v for k, v in tags_db.items() if not k.startswith("_")}
except Exception as e:
    print(f"⚠️ Config files loading error: {e}")
    registry, clean_tags_db = [], {}


async def select_best_template(user_query):
    """Identifies the best document template using LLM reasoning."""
    docs_list = "\n".join([f"- {item['filename']} ({item.get('description', '')})" for item in registry])

    try:
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": PROMPTS["router"].format(docs_list=docs_list)},
                {"role": "user", "content": user_query}
            ],
            response_format={"type": "json_object"},
            temperature=0.0
        )
        result = json_repair.loads(response.choices[0].message.content)
        return result.get("filename")
    except Exception as e:
        print(f"⚠️ Router Error: {e}")
        return None


async def extract_data_from_chat(user_query, filename):
    """Extracts required data fields for the document."""
    schema = "\n".join([f"- {v['tag']}: {v['description']}" for k, v in clean_tags_db.items()])

    try:
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": PROMPTS["ner_extractor"].format(schema=schema)},
                {"role": "user", "content": user_query}
            ],
            response_format={"type": "json_object"},
            temperature=0.1
        )
        return json_repair.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"⚠️ Extraction Error: {e}")
        return {}


async def consult_logic(user_text):
    """Core RAG consultation logic."""
    context = "No specific articles found in the database."

    # RAG: Retrieve context from ChromaDB
    if collection and encoder:
        try:
            vec = encoder.encode(user_text).tolist()
            res = collection.query(query_embeddings=[vec], n_results=3)
            if res['documents'] and res['documents'][0]:
                context = "\n---\n".join(res['documents'][0])
        except Exception as e:
            print(f"⚠️ Vector Search Error: {e}")

    try:
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": PROMPTS["consultant"].format(context=context)},
                {"role": "user", "content": f"User Question: {user_text}"}
            ],
            temperature=0.3
        )
        return {"type": "text", "content": response.choices[0].message.content}
    except Exception as e:
        return {"type": "text", "content": f"⚠️ Connection Error: {str(e)}"}


async def generate_doc_logic(user_text):
    """Handles the document generation pipeline (Currently in development)."""
    best_filename = await select_best_template(user_text)

    if not best_filename:
        fallback = await consult_logic(f"Draft a response for: {user_text}")
        fallback["content"] = "⚠️ **No matching template found.** Here is a manual draft:\n\n" + fallback["content"]
        return fallback

    template_path = os.path.join(TEMPLATES_DIR, best_filename)
    if not os.path.exists(template_path):
        return {"type": "text", "content": f"⚠️ Template file '{best_filename}' not found on server."}

    data = await extract_data_from_chat(user_text, best_filename)
    if "doc_date" not in data: data["doc_date"] = datetime.now().strftime("%d.%m.%Y")

    try:
        doc = DocxTemplate(template_path)
        doc.render(data)
        os.makedirs(DOWNLOADS_DIR, exist_ok=True)

        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        out_name = f"LexGuard_{ts}_{best_filename}"
        out_path = os.path.join(DOWNLOADS_DIR, out_name)
        doc.save(out_path)

        return {
            "type": "file",
            "content": f"✅ Document successfully generated using template: **{best_filename}**",
            "file_url": out_path
        }
    except Exception as e:
        return {"type": "text", "content": f"⚠️ Generation error: {e}"}