TeanShow commited on
Commit
8d17c17
Β·
verified Β·
1 Parent(s): 2c137a9

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ legal_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
api.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import zipfile
4
+ import json_repair
5
+ from docxtpl import DocxTemplate
6
+ from openai import OpenAI
7
+ from datetime import datetime
8
+ import chromadb
9
+ from sentence_transformers import SentenceTransformer
10
+ API_KEY = os.getenv("DEEPSEEK_API_KEY")
11
+ BASE_URL = "https://api.deepseek.com"
12
+ TEMPLATES_DIR = "tagged_templates"
13
+ DOWNLOADS_DIR = "downloads"
14
+ REGISTRY_FILE = "templates_registry.json"
15
+ TAGS_DB_FILE = "tags_db.json"
16
+ DB_PATH = "./legal_db"
17
+ PROMPTS = {
18
+ "router": """
19
+ You are a Legal Document Dispatcher. Your goal is to identify the most suitable document template from the list below based on the user's request.
20
+ AVAILABLE TEMPLATES:
21
+ {docs_list}
22
+
23
+ INSTRUCTION:
24
+ Return ONLY a JSON object: {{"filename": "exact_name.docx"}}
25
+ If no suitable template is found, return: {{"filename": null}}
26
+ """,
27
+
28
+ "ner_extractor": """
29
+ You are a Legal Data Extraction specialist. Your task is to extract entity information from the user's query into a structured JSON format.
30
+ DATE FORMAT: dd.mm.yyyy
31
+ REQUIRED SCHEMA:
32
+ {schema}
33
+ """,
34
+
35
+ "consultant": """
36
+ You are LexGuard AI, a professional legal assistant specializing in EU Law and GDPR.
37
+ Provide accurate, structured, and formal legal advice based on the provided context.
38
+
39
+ GUIDELINES:
40
+ 1. CITATIONS: Always mention specific GDPR Articles or Recitals if they are present in the context.
41
+ 2. LIMITATIONS: If the context doesn't contain the answer, use your general knowledge of EU Law but clearly state it is general information.
42
+ 3. STRUCTURE: Use Markdown (bolding, bullet points) for clarity.
43
+ 4. TONE: Professional, objective, and helpful.
44
+
45
+ GDPR DATABASE CONTEXT:
46
+ {context}
47
+ """
48
+ }
49
+ client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
50
+ collection = None
51
+ encoder = None
52
+
53
+ try:
54
+ encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
55
+ chroma_client = chromadb.PersistentClient(path=DB_PATH)
56
+ collection = chroma_client.get_collection(name="laws")
57
+ print("βœ… ChromaDB and Encoder initialized")
58
+ except Exception as e:
59
+ print(f"⚠️ RAG initialization error: {e}")
60
+ try:
61
+ with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
62
+ registry = json.load(f)
63
+ with open(TAGS_DB_FILE, "r", encoding="utf-8") as f:
64
+ tags_db = json.load(f)
65
+ clean_tags_db = {k: v for k, v in tags_db.items() if not k.startswith("_")}
66
+ except Exception as e:
67
+ print(f"⚠️ Config files loading error: {e}")
68
+ registry, clean_tags_db = [], {}
69
+
70
+
71
+ async def select_best_template(user_query):
72
+ """Identifies the best document template using LLM reasoning."""
73
+ docs_list = "\n".join([f"- {item['filename']} ({item.get('description', '')})" for item in registry])
74
+
75
+ try:
76
+ response = client.chat.completions.create(
77
+ model="deepseek-chat",
78
+ messages=[
79
+ {"role": "system", "content": PROMPTS["router"].format(docs_list=docs_list)},
80
+ {"role": "user", "content": user_query}
81
+ ],
82
+ response_format={"type": "json_object"},
83
+ temperature=0.0
84
+ )
85
+ result = json_repair.loads(response.choices[0].message.content)
86
+ return result.get("filename")
87
+ except Exception as e:
88
+ print(f"⚠️ Router Error: {e}")
89
+ return None
90
+
91
+
92
+ async def extract_data_from_chat(user_query, filename):
93
+ """Extracts required data fields for the document."""
94
+ schema = "\n".join([f"- {v['tag']}: {v['description']}" for k, v in clean_tags_db.items()])
95
+
96
+ try:
97
+ response = client.chat.completions.create(
98
+ model="deepseek-chat",
99
+ messages=[
100
+ {"role": "system", "content": PROMPTS["ner_extractor"].format(schema=schema)},
101
+ {"role": "user", "content": user_query}
102
+ ],
103
+ response_format={"type": "json_object"},
104
+ temperature=0.1
105
+ )
106
+ return json_repair.loads(response.choices[0].message.content)
107
+ except Exception as e:
108
+ print(f"⚠️ Extraction Error: {e}")
109
+ return {}
110
+
111
+
112
+ async def consult_logic(user_text):
113
+ """Core RAG consultation logic."""
114
+ context = "No specific articles found in the database."
115
+
116
+ # RAG: Retrieve context from ChromaDB
117
+ if collection and encoder:
118
+ try:
119
+ vec = encoder.encode(user_text).tolist()
120
+ res = collection.query(query_embeddings=[vec], n_results=3)
121
+ if res['documents'] and res['documents'][0]:
122
+ context = "\n---\n".join(res['documents'][0])
123
+ except Exception as e:
124
+ print(f"⚠️ Vector Search Error: {e}")
125
+
126
+ try:
127
+ response = client.chat.completions.create(
128
+ model="deepseek-chat",
129
+ messages=[
130
+ {"role": "system", "content": PROMPTS["consultant"].format(context=context)},
131
+ {"role": "user", "content": f"User Question: {user_text}"}
132
+ ],
133
+ temperature=0.3
134
+ )
135
+ return {"type": "text", "content": response.choices[0].message.content}
136
+ except Exception as e:
137
+ return {"type": "text", "content": f"⚠️ Connection Error: {str(e)}"}
138
+
139
+
140
+ async def generate_doc_logic(user_text):
141
+ """Handles the document generation pipeline (Currently in development)."""
142
+ best_filename = await select_best_template(user_text)
143
+
144
+ if not best_filename:
145
+ fallback = await consult_logic(f"Draft a response for: {user_text}")
146
+ fallback["content"] = "⚠️ **No matching template found.** Here is a manual draft:\n\n" + fallback["content"]
147
+ return fallback
148
+
149
+ template_path = os.path.join(TEMPLATES_DIR, best_filename)
150
+ if not os.path.exists(template_path):
151
+ return {"type": "text", "content": f"⚠️ Template file '{best_filename}' not found on server."}
152
+
153
+ data = await extract_data_from_chat(user_text, best_filename)
154
+ if "doc_date" not in data: data["doc_date"] = datetime.now().strftime("%d.%m.%Y")
155
+
156
+ try:
157
+ doc = DocxTemplate(template_path)
158
+ doc.render(data)
159
+ os.makedirs(DOWNLOADS_DIR, exist_ok=True)
160
+
161
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
162
+ out_name = f"LexGuard_{ts}_{best_filename}"
163
+ out_path = os.path.join(DOWNLOADS_DIR, out_name)
164
+ doc.save(out_path)
165
+
166
+ return {
167
+ "type": "file",
168
+ "content": f"βœ… Document successfully generated using template: **{best_filename}**",
169
+ "file_url": out_path
170
+ }
171
+ except Exception as e:
172
+ return {"type": "text", "content": f"⚠️ Generation error: {e}"}
app.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import asyncio
4
+
5
+ try:
6
+ from api import consult_logic, generate_doc_logic
7
+
8
+ print("βœ… Logic successfully connected from api.py")
9
+ except ImportError as e:
10
+ print(f"❌ IMPORT ERROR: {e}")
11
+
12
+
13
+ async def consult_logic(msg):
14
+ return {"content": f"Logic Error: {e}"}
15
+
16
+
17
+ async def generate_doc_logic(msg):
18
+ return {"content": f"Logic Error: {e}"}
19
+
20
+
21
+ async def main_interface(user_text):
22
+ if not user_text: return None, ""
23
+
24
+ doc_keywords = ["draft", "generate", "create", "contract", "agreement", "clause", "policy", "legal form"]
25
+ is_doc = any(kw in user_text.lower() for kw in doc_keywords) and len(user_text) > 12
26
+
27
+ try:
28
+ if is_doc:
29
+ # TODO: Document generation logic (Coming Soon)
30
+ return None, "πŸ› οΈ **Document Generation feature is coming soon!**\n\nCurrently, I can only provide legal consultations regarding GDPR. Please try asking a question like: *'What are the requirements for a Privacy Policy?'*"
31
+ else:
32
+ result = await consult_logic(user_text)
33
+ return None, result.get("content", "")
34
+ except Exception as e:
35
+ return None, f"⚠️ System Error: {str(e)}"
36
+
37
+
38
+ async def respond(message, history):
39
+ if history is None: history = []
40
+
41
+ _, response_text = await main_interface(message)
42
+
43
+ history.append({"role": "user", "content": message})
44
+ history.append({"role": "assistant", "content": response_text})
45
+
46
+ return "", history
47
+
48
+ css_code = """
49
+ <style>
50
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
51
+ body {
52
+ background-color: #000000 !important;
53
+ margin: 0 !important;
54
+ padding: 0 !important;
55
+ overflow: hidden !important;
56
+ }
57
+ .gradio-container {
58
+ background-color: #000000 !important;
59
+ color: #FFFFFF !important;
60
+ font-family: 'Inter', sans-serif !important;
61
+ height: 100vh !important;
62
+ max-height: 100vh !important;
63
+ margin: 0 !important;
64
+ padding: 0 !important;
65
+ display: flex !important;
66
+ flex-direction: column !important;
67
+ }
68
+ footer, .header-wrapper { display: none !important; }
69
+
70
+ #app-layout {
71
+ height: 100vh !important; /* Π˜ΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ vh, Ρ‡Ρ‚ΠΎΠ±Ρ‹ Ρ‚ΠΎΡ‡Π½ΠΎ Π·Π°Π½ΡΡ‚ΡŒ вСсь экран */
72
+ width: 100% !important;
73
+ max-width: 800px !important;
74
+ margin: 0 auto !important;
75
+ display: flex !important;
76
+ flex-direction: column !important;
77
+ justify-content: space-between !important;
78
+ padding: 20px 20px 30px 20px !important;
79
+
80
+ box-sizing: border-box !important;
81
+ overflow: hidden !important;
82
+ }
83
+ .title-text {
84
+ text-align: center;
85
+ color: #FFFFFF !important;
86
+ font-size: 18px;
87
+ font-weight: 600;
88
+ margin-bottom: 20px;
89
+ flex-shrink: 0;
90
+ }
91
+ .subtitle-text {
92
+ width: 100% !important;
93
+ text-align: center !important;
94
+ color: #666 !important;
95
+ font-size: 14px;
96
+ margin-bottom: 25px !important;
97
+ display: block !important;
98
+ }
99
+ #suggestions-row {
100
+ justify-content: center !important;
101
+ gap: 10px !important;
102
+ margin-bottom: 20px !important;
103
+ background: transparent !important;
104
+ border: none !important;
105
+ flex-shrink: 0 !important;
106
+ }
107
+ .suggestion-btn {
108
+ background-color: #111 !important;
109
+ border: 1px solid #333 !important;
110
+ border-radius: 10px !important;
111
+ color: #AAA !important;
112
+ font-size: 11px !important;
113
+ padding: 8px 16px !important;
114
+ width: auto !important;
115
+ white-space: nowrap !important;
116
+ display: inline-flex !important;
117
+ align-items: center !important;
118
+ justify-content: center !important;
119
+ }
120
+ .soon-btn {
121
+ opacity: 0.4 !important;
122
+ border-style: dashed !important;
123
+ pointer-events: none !important;
124
+
125
+ filter: grayscale(100%);
126
+ cursor: default !important;
127
+ }
128
+ .suggestion-btn:hover {
129
+ background-color: #222 !important;
130
+ border-color: #555 !important;
131
+ color: #FFFFFF !important;
132
+ }
133
+ #gpt-chat {
134
+ flex-grow: 1 !important;
135
+ overflow-y: auto !important;
136
+ background: transparent !important;
137
+ border: none !important;
138
+ margin-bottom: 10px !important;
139
+ scrollbar-width: none;
140
+ }
141
+ #gpt-chat::-webkit-scrollbar { display: none; }
142
+ .gradio-chatbot { background: transparent !important; }
143
+ .bubble-wrap { background: transparent !important; border: none !important; }
144
+ .message { padding: 10px 0 !important; background: transparent !important; border: none !important; }
145
+ .message.user {
146
+ background-color: #1a1a1a !important;
147
+ border: 1px solid #333 !important;
148
+ border-radius: 18px !important;
149
+ color: #FFFFFF !important;
150
+ padding: 10px 15px !important;
151
+ max-width: 85% !important;
152
+ margin-left: auto !important;
153
+ }
154
+ .message.bot {
155
+ background-color: transparent !important;
156
+ color: #E0E0E0 !important;
157
+ padding-left: 0 !important;
158
+ }
159
+ .soon-btn {
160
+ opacity: 0.5 !important;
161
+ cursor: not-allowed !important;
162
+ border-style: dashed !important;
163
+ }
164
+ .soon-btn:hover {
165
+ border-color: #333 !important;
166
+ color: #AAA !important;
167
+ }
168
+ #input-container {
169
+ flex-shrink: 0 !important;
170
+ width: 100% !important;
171
+ }
172
+ #input-capsule {
173
+ background-color: #000000 !important;
174
+ border: 1px solid #333 !important;
175
+ border-radius: 30px !important;
176
+ padding: 4px 6px 4px 15px !important;
177
+ display: flex !important;
178
+ align-items: center !important;
179
+ min-height: 50px !important;
180
+ }
181
+ #chat-input {
182
+ border: none !important;
183
+ background: transparent !important;
184
+ padding: 0 !important;
185
+ flex-grow: 1 !important;
186
+ }
187
+ #chat-input textarea {
188
+ background-color: transparent !important;
189
+ border: none !important;
190
+ box-shadow: none !important;
191
+ color: #FFFFFF !important;
192
+ font-size: 15px !important;
193
+ padding: 0 !important;
194
+ height: 100% !important;
195
+ min-height: 24px !important;
196
+ resize: none !important;
197
+ }
198
+ #chat-input textarea:focus { box-shadow: none !important; border: none !important; }
199
+ #chat-input textarea::placeholder { color: rgba(255, 255, 255, 0.5) !important; opacity: 1 !important; }
200
+ #send-btn {
201
+ background-color: #1f1f1f !important;
202
+ color: #fff !important;
203
+
204
+ width: 32px !important;
205
+ height: 32px !important;
206
+ min-width: 32px !important;
207
+ max-width: 32px !important;
208
+ min-height: 32px !important;
209
+ max-height: 32px !important;
210
+
211
+ border-radius: 50% !important;
212
+ border: none !important;
213
+ padding: 0 !important;
214
+ margin: 0 0 0 8px !important;
215
+
216
+ display: flex !important;
217
+ justify-content: center !important;
218
+ align-items: center !important;
219
+
220
+ flex-shrink: 0 !important;
221
+ box-shadow: none !important;
222
+ }
223
+ #send-btn:hover { background-color: #FFFFFF !important; color: #000000 !important; }
224
+ </style>
225
+ """
226
+ with gr.Blocks(title="LexGuard EU") as demo:
227
+ gr.HTML(css_code)
228
+
229
+ with gr.Column(elem_id="app-layout"):
230
+ gr.HTML('<div class="subtitle-text">Next-Gen GDPR & EU Law Intelligence</div>')
231
+ msg = gr.Textbox(
232
+ render=False,
233
+ elem_id="chat-input",
234
+ placeholder="Ask about GDPR compliance or legal...",
235
+ show_label=False,
236
+ container=False
237
+ )
238
+
239
+ with gr.Row(elem_id="suggestions-row"):
240
+ btn_doc = gr.Button("πŸ“„ Generate Document (Soon)", elem_classes=["suggestion-btn", "soon-btn"],
241
+ interactive=False)
242
+ btn_law = gr.Button("βš–οΈ Legal Analysis", elem_classes="suggestion-btn")
243
+ btn_cons = gr.Button("πŸŽ“ GDPR Consultation", elem_classes="suggestion-btn")
244
+ btn_claim = gr.Button("πŸ“© Complaints / Claims", elem_classes="suggestion-btn")
245
+ examples_container = gr.Column()
246
+ chatbot = gr.Chatbot(
247
+ elem_id="gpt-chat",
248
+ show_label=False,
249
+ height=450,
250
+ )
251
+
252
+
253
+ with gr.Row(elem_id="input-capsule"):
254
+ msg = gr.Textbox(
255
+ elem_id="chat-input",
256
+ placeholder="Ask about GDPR compliance or legal procedures...",
257
+ show_label=False,
258
+ scale=10,
259
+ container=False
260
+ )
261
+ submit = gr.Button("↑", elem_id="send-btn", scale=0)
262
+ with gr.Row():
263
+ with gr.Column(scale=1):
264
+ gr.Examples(
265
+ examples=[
266
+ ["What are the transparency obligations for high-risk AI?"],
267
+ ["Explain Article 17 GDPR."],
268
+ ["Cyber vulnerability reporting deadlines?"]
269
+ ],
270
+ inputs=msg,
271
+ label=None,
272
+ elem_id="compact-examples"
273
+ )
274
+
275
+
276
+ gr.HTML("""
277
+ <div style="
278
+ font-size: 11px;
279
+ color: #888;
280
+ text-align: center;
281
+ margin-top: 5px;
282
+ opacity: 0.8;
283
+ ">
284
+ <b>Disclaimer:</b> AI can make mistakes. Verify important information.<br>
285
+ Powered by <b>DeepSeek-V3.2</b>
286
+ </div>
287
+ """)
288
+ btn_doc.click(lambda: "Help me draft a Privacy Policy for a startup: ", None, msg)
289
+ btn_law.click(lambda: "Analyze GDPR requirements for data processing: ", None, msg)
290
+ btn_cons.click(lambda: "What are the DPO's main responsibilities according to GDPR? ", None, msg)
291
+ btn_claim.click(lambda: "How to file a data breach notification to the authority? ", None, msg)
292
+
293
+ msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
294
+ submit.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
295
+
296
+ if __name__ == "__main__":
297
+ abs_downloads_path = os.path.abspath("downloads")
298
+ os.makedirs(abs_downloads_path, exist_ok=True)
299
+ demo.launch(server_name="0.0.0.0", show_error=True)
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e2a5c66a30e0d9228b85d06681048e2d25425ad5b7f8f10b672c87ac37e001
3
+ size 321200
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03cb3ac86f3e5bcb15e88b9bf99f760ec6b33e31d64a699e129b49868db6d733
3
+ size 100
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558f7539920ad7bcf3db87c3f13a1d88e4d0267b5a85030d4375e04515c5b80c
3
+ size 400
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
legal_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a54ff3d573b45967efcbcf629c5b6aa8cddbdddf7cecef62b07dd6bff2187d10
3
+ size 8052736
requirements.txt ADDED
Binary file (300 Bytes). View file