Spaces:

TeanShow
/

LexGuard-EU

Sleeping

App Files Files Community

TeanShow commited on Apr 14

Commit

8d17c17

verified ·

1 Parent(s): 2c137a9

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
api.py +172 -0
app.py +299 -0
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/data_level0.bin +3 -0
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/header.bin +3 -0
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/length.bin +3 -0
legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/link_lists.bin +3 -0
legal_db/chroma.sqlite3 +3 -0
requirements.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+legal_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

api.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import os
+import json
+import zipfile
+import json_repair
+from docxtpl import DocxTemplate
+from openai import OpenAI
+from datetime import datetime
+import chromadb
+from sentence_transformers import SentenceTransformer
+API_KEY = os.getenv("DEEPSEEK_API_KEY")
+BASE_URL = "https://api.deepseek.com"
+TEMPLATES_DIR = "tagged_templates"
+DOWNLOADS_DIR = "downloads"
+REGISTRY_FILE = "templates_registry.json"
+TAGS_DB_FILE = "tags_db.json"
+DB_PATH = "./legal_db"
+PROMPTS = {
+    "router": """
+You are a Legal Document Dispatcher. Your goal is to identify the most suitable document template from the list below based on the user's request.
+AVAILABLE TEMPLATES:
+{docs_list}
+INSTRUCTION:
+Return ONLY a JSON object: {{"filename": "exact_name.docx"}}
+If no suitable template is found, return: {{"filename": null}}
+""",
+    "ner_extractor": """
+You are a Legal Data Extraction specialist. Your task is to extract entity information from the user's query into a structured JSON format.
+DATE FORMAT: dd.mm.yyyy
+REQUIRED SCHEMA:
+{schema}
+""",
+    "consultant": """
+You are LexGuard AI, a professional legal assistant specializing in EU Law and GDPR.
+Provide accurate, structured, and formal legal advice based on the provided context.
+GUIDELINES:
+1. CITATIONS: Always mention specific GDPR Articles or Recitals if they are present in the context.
+2. LIMITATIONS: If the context doesn't contain the answer, use your general knowledge of EU Law but clearly state it is general information.
+3. STRUCTURE: Use Markdown (bolding, bullet points) for clarity.
+4. TONE: Professional, objective, and helpful.
+GDPR DATABASE CONTEXT:
+{context}
+"""
+}
+client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
+collection = None
+encoder = None
+try:
+    encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
+    chroma_client = chromadb.PersistentClient(path=DB_PATH)
+    collection = chroma_client.get_collection(name="laws")
+    print("✅ ChromaDB and Encoder initialized")
+except Exception as e:
+    print(f"⚠️ RAG initialization error: {e}")
+try:
+    with open(REGISTRY_FILE, "r", encoding="utf-8") as f:
+        registry = json.load(f)
+    with open(TAGS_DB_FILE, "r", encoding="utf-8") as f:
+        tags_db = json.load(f)
+    clean_tags_db = {k: v for k, v in tags_db.items() if not k.startswith("_")}
+except Exception as e:
+    print(f"⚠️ Config files loading error: {e}")
+    registry, clean_tags_db = [], {}
+async def select_best_template(user_query):
+    """Identifies the best document template using LLM reasoning."""
+    docs_list = "\n".join([f"- {item['filename']} ({item.get('description', '')})" for item in registry])
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[
+                {"role": "system", "content": PROMPTS["router"].format(docs_list=docs_list)},
+                {"role": "user", "content": user_query}
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.0
+        )
+        result = json_repair.loads(response.choices[0].message.content)
+        return result.get("filename")
+    except Exception as e:
+        print(f"⚠️ Router Error: {e}")
+        return None
+async def extract_data_from_chat(user_query, filename):
+    """Extracts required data fields for the document."""
+    schema = "\n".join([f"- {v['tag']}: {v['description']}" for k, v in clean_tags_db.items()])
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[
+                {"role": "system", "content": PROMPTS["ner_extractor"].format(schema=schema)},
+                {"role": "user", "content": user_query}
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1
+        )
+        return json_repair.loads(response.choices[0].message.content)
+    except Exception as e:
+        print(f"⚠️ Extraction Error: {e}")
+        return {}
+async def consult_logic(user_text):
+    """Core RAG consultation logic."""
+    context = "No specific articles found in the database."
+    # RAG: Retrieve context from ChromaDB
+    if collection and encoder:
+        try:
+            vec = encoder.encode(user_text).tolist()
+            res = collection.query(query_embeddings=[vec], n_results=3)
+            if res['documents'] and res['documents'][0]:
+                context = "\n---\n".join(res['documents'][0])
+        except Exception as e:
+            print(f"⚠️ Vector Search Error: {e}")
+    try:
+        response = client.chat.completions.create(
+            model="deepseek-chat",
+            messages=[
+                {"role": "system", "content": PROMPTS["consultant"].format(context=context)},
+                {"role": "user", "content": f"User Question: {user_text}"}
+            ],
+            temperature=0.3
+        )
+        return {"type": "text", "content": response.choices[0].message.content}
+    except Exception as e:
+        return {"type": "text", "content": f"⚠️ Connection Error: {str(e)}"}
+async def generate_doc_logic(user_text):
+    """Handles the document generation pipeline (Currently in development)."""
+    best_filename = await select_best_template(user_text)
+    if not best_filename:
+        fallback = await consult_logic(f"Draft a response for: {user_text}")
+        fallback["content"] = "⚠️ **No matching template found.** Here is a manual draft:\n\n" + fallback["content"]
+        return fallback
+    template_path = os.path.join(TEMPLATES_DIR, best_filename)
+    if not os.path.exists(template_path):
+        return {"type": "text", "content": f"⚠️ Template file '{best_filename}' not found on server."}
+    data = await extract_data_from_chat(user_text, best_filename)
+    if "doc_date" not in data: data["doc_date"] = datetime.now().strftime("%d.%m.%Y")
+    try:
+        doc = DocxTemplate(template_path)
+        doc.render(data)
+        os.makedirs(DOWNLOADS_DIR, exist_ok=True)
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        out_name = f"LexGuard_{ts}_{best_filename}"
+        out_path = os.path.join(DOWNLOADS_DIR, out_name)
+        doc.save(out_path)
+        return {
+            "type": "file",
+            "content": f"✅ Document successfully generated using template: **{best_filename}**",
+            "file_url": out_path
+        }
+    except Exception as e:
+        return {"type": "text", "content": f"⚠️ Generation error: {e}"}

app.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import gradio as gr
+import os
+import asyncio
+try:
+    from api import consult_logic, generate_doc_logic
+    print("✅ Logic successfully connected from api.py")
+except ImportError as e:
+    print(f"❌ IMPORT ERROR: {e}")
+    async def consult_logic(msg):
+        return {"content": f"Logic Error: {e}"}
+    async def generate_doc_logic(msg):
+        return {"content": f"Logic Error: {e}"}
+async def main_interface(user_text):
+    if not user_text: return None, ""
+    doc_keywords = ["draft", "generate", "create", "contract", "agreement", "clause", "policy", "legal form"]
+    is_doc = any(kw in user_text.lower() for kw in doc_keywords) and len(user_text) > 12
+    try:
+        if is_doc:
+            # TODO: Document generation logic (Coming Soon)
+            return None, "🛠️ **Document Generation feature is coming soon!**\n\nCurrently, I can only provide legal consultations regarding GDPR. Please try asking a question like: *'What are the requirements for a Privacy Policy?'*"
+        else:
+            result = await consult_logic(user_text)
+            return None, result.get("content", "")
+    except Exception as e:
+        return None, f"⚠️ System Error: {str(e)}"
+async def respond(message, history):
+    if history is None: history = []
+    _, response_text = await main_interface(message)
+    history.append({"role": "user", "content": message})
+    history.append({"role": "assistant", "content": response_text})
+    return "", history
+css_code = """
+<style>
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
+body {
+    background-color: #000000 !important;
+    margin: 0 !important;
+    padding: 0 !important;
+    overflow: hidden !important;
+}
+.gradio-container {
+    background-color: #000000 !important;
+    color: #FFFFFF !important;
+    font-family: 'Inter', sans-serif !important;
+    height: 100vh !important;
+    max-height: 100vh !important;
+    margin: 0 !important;
+    padding: 0 !important;
+    display: flex !important;
+    flex-direction: column !important;
+}
+footer, .header-wrapper { display: none !important; }
+#app-layout {
+    height: 100vh !important; /* Используем vh, чтобы точно занять весь экран */
+    width: 100% !important;
+    max-width: 800px !important;
+    margin: 0 auto !important;
+    display: flex !important;
+    flex-direction: column !important;
+    justify-content: space-between !important;
+    padding: 20px 20px 30px 20px !important;
+    box-sizing: border-box !important;
+    overflow: hidden !important;
+}
+.title-text {
+    text-align: center;
+    color: #FFFFFF !important;
+    font-size: 18px;
+    font-weight: 600;
+    margin-bottom: 20px;
+    flex-shrink: 0;
+}
+.subtitle-text {
+    width: 100% !important;
+    text-align: center !important;
+    color: #666 !important;
+    font-size: 14px;
+    margin-bottom: 25px !important;
+    display: block !important;
+}
+#suggestions-row {
+    justify-content: center !important;
+    gap: 10px !important;
+    margin-bottom: 20px !important;
+    background: transparent !important;
+    border: none !important;
+    flex-shrink: 0 !important;
+}
+.suggestion-btn {
+    background-color: #111 !important;
+    border: 1px solid #333 !important;
+    border-radius: 10px !important;
+    color: #AAA !important;
+    font-size: 11px !important;
+    padding: 8px 16px !important;
+    width: auto !important;
+    white-space: nowrap !important;
+    display: inline-flex !important;
+    align-items: center !important;
+    justify-content: center !important;
+}
+.soon-btn {
+    opacity: 0.4 !important;
+    border-style: dashed !important;
+    pointer-events: none !important;
+    filter: grayscale(100%);
+    cursor: default !important;
+}
+.suggestion-btn:hover {
+    background-color: #222 !important;
+    border-color: #555 !important;
+    color: #FFFFFF !important;
+}
+#gpt-chat {
+    flex-grow: 1 !important;
+    overflow-y: auto !important;
+    background: transparent !important;
+    border: none !important;
+    margin-bottom: 10px !important;
+    scrollbar-width: none;
+}
+#gpt-chat::-webkit-scrollbar { display: none; }
+.gradio-chatbot { background: transparent !important; }
+.bubble-wrap { background: transparent !important; border: none !important; }
+.message { padding: 10px 0 !important; background: transparent !important; border: none !important; }
+.message.user {
+    background-color: #1a1a1a !important;
+    border: 1px solid #333 !important;
+    border-radius: 18px !important;
+    color: #FFFFFF !important;
+    padding: 10px 15px !important;
+    max-width: 85% !important;
+    margin-left: auto !important;
+}
+.message.bot {
+    background-color: transparent !important;
+    color: #E0E0E0 !important;
+    padding-left: 0 !important;
+}
+.soon-btn {
+    opacity: 0.5 !important;
+    cursor: not-allowed !important;
+    border-style: dashed !important;
+}
+.soon-btn:hover {
+    border-color: #333 !important;
+    color: #AAA !important;
+}
+#input-container {
+    flex-shrink: 0 !important;
+    width: 100% !important;
+}
+#input-capsule {
+    background-color: #000000 !important;
+    border: 1px solid #333 !important;
+    border-radius: 30px !important;
+    padding: 4px 6px 4px 15px !important;
+    display: flex !important;
+    align-items: center !important;
+    min-height: 50px !important;
+}
+#chat-input {
+    border: none !important;
+    background: transparent !important;
+    padding: 0 !important;
+    flex-grow: 1 !important;
+}
+#chat-input textarea {
+    background-color: transparent !important;
+    border: none !important;
+    box-shadow: none !important;
+    color: #FFFFFF !important;
+    font-size: 15px !important;
+    padding: 0 !important;
+    height: 100% !important;
+    min-height: 24px !important;
+    resize: none !important;
+}
+#chat-input textarea:focus { box-shadow: none !important; border: none !important; }
+#chat-input textarea::placeholder { color: rgba(255, 255, 255, 0.5) !important; opacity: 1 !important; }
+#send-btn {
+    background-color: #1f1f1f !important;
+    color: #fff !important;
+    width: 32px !important;
+    height: 32px !important;
+    min-width: 32px !important;
+    max-width: 32px !important;
+    min-height: 32px !important;
+    max-height: 32px !important;
+    border-radius: 50% !important;
+    border: none !important;
+    padding: 0 !important;
+    margin: 0 0 0 8px !important;
+    display: flex !important;
+    justify-content: center !important;
+    align-items: center !important;
+    flex-shrink: 0 !important;
+    box-shadow: none !important;
+}
+#send-btn:hover { background-color: #FFFFFF !important; color: #000000 !important; }
+</style>
+"""
+with gr.Blocks(title="LexGuard EU") as demo:
+    gr.HTML(css_code)
+    with gr.Column(elem_id="app-layout"):
+        gr.HTML('<div class="subtitle-text">Next-Gen GDPR & EU Law Intelligence</div>')
+        msg = gr.Textbox(
+            render=False,
+            elem_id="chat-input",
+            placeholder="Ask about GDPR compliance or legal...",
+            show_label=False,
+            container=False
+        )
+        with gr.Row(elem_id="suggestions-row"):
+            btn_doc = gr.Button("📄 Generate Document (Soon)", elem_classes=["suggestion-btn", "soon-btn"],
+                                interactive=False)
+            btn_law = gr.Button("⚖️ Legal Analysis", elem_classes="suggestion-btn")
+            btn_cons = gr.Button("🎓 GDPR Consultation", elem_classes="suggestion-btn")
+            btn_claim = gr.Button("📩 Complaints / Claims", elem_classes="suggestion-btn")
+        examples_container = gr.Column()
+        chatbot = gr.Chatbot(
+            elem_id="gpt-chat",
+            show_label=False,
+            height=450,
+        )
+        with gr.Row(elem_id="input-capsule"):
+            msg = gr.Textbox(
+                elem_id="chat-input",
+                placeholder="Ask about GDPR compliance or legal procedures...",
+                show_label=False,
+                scale=10,
+                container=False
+            )
+            submit = gr.Button("↑", elem_id="send-btn", scale=0)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Examples(
+                    examples=[
+                        ["What are the transparency obligations for high-risk AI?"],
+                        ["Explain Article 17 GDPR."],
+                        ["Cyber vulnerability reporting deadlines?"]
+                    ],
+                    inputs=msg,
+                    label=None,
+                    elem_id="compact-examples"
+                )
+        gr.HTML("""
+            <div style="
+                font-size: 11px;
+                color: #888;
+                text-align: center;
+                margin-top: 5px;
+                opacity: 0.8;
+            ">
+                <b>Disclaimer:</b> AI can make mistakes. Verify important information.<br>
+                Powered by <b>DeepSeek-V3.2</b>
+            </div>
+        """)
+    btn_doc.click(lambda: "Help me draft a Privacy Policy for a startup: ", None, msg)
+    btn_law.click(lambda: "Analyze GDPR requirements for data processing: ", None, msg)
+    btn_cons.click(lambda: "What are the DPO's main responsibilities according to GDPR? ", None, msg)
+    btn_claim.click(lambda: "How to file a data breach notification to the authority? ", None, msg)
+    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
+    submit.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
+if __name__ == "__main__":
+    abs_downloads_path = os.path.abspath("downloads")
+    os.makedirs(abs_downloads_path, exist_ok=True)
+    demo.launch(server_name="0.0.0.0", show_error=True)

legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7e2a5c66a30e0d9228b85d06681048e2d25425ad5b7f8f10b672c87ac37e001
+size 321200

legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03cb3ac86f3e5bcb15e88b9bf99f760ec6b33e31d64a699e129b49868db6d733
+size 100

legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:558f7539920ad7bcf3db87c3f13a1d88e4d0267b5a85030d4375e04515c5b80c
+size 400

legal_db/a7fa5423-401a-4ab5-a67d-02470bacc664/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

legal_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a54ff3d573b45967efcbcf629c5b6aa8cddbdddf7cecef62b07dd6bff2187d10
+size 8052736

requirements.txt ADDED Viewed

Binary file (300 Bytes). View file