Spaces:

Aya-Samir-Emam
/

Aya-AI

Sleeping

App Files Files Community

Aya-Samir-Emam commited on Mar 18

Commit

0016e0e

verified ·

1 Parent(s): 0aaa5da

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -84

app.py CHANGED Viewed

@@ -3,129 +3,115 @@ import torch
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
 import PyPDF2
 from docx import Document
-from fpdf import FPDF
-import base64
-# --- 1. Page Configuration ---
-st.set_page_config(page_title="LexGuard Ultimate | AI Legal Auditor", page_icon="⚖️", layout="wide")
-# --- 2. Load Engines (Legal & Sentiment) ---
 @st.cache_resource
 def load_engines():
     tokenizer = AutoTokenizer.from_pretrained("marshmellow77/roberta-base-cuad")
     model = AutoModelForQuestionAnswering.from_pretrained("marshmellow77/roberta-base-cuad")
-    risk_engine = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
-    return tokenizer, model, risk_engine
-tokenizer, model, risk_engine = load_engines()
-# --- 3. Advanced Extraction & Analysis Logic ---
-def get_legal_clause(query, context):
     inputs = tokenizer(query, context, truncation="only_second", max_length=512, stride=256,
-                          return_overflowing_tokens=True, padding="max_length", return_tensors="pt")
-    best_ans, max_score = "", -float('inf')
     for i in range(len(inputs["input_ids"])):
         chunk = {"input_ids": inputs["input_ids"][i:i+1], "attention_mask": inputs["attention_mask"][i:i+1]}
         with torch.no_grad():
             outputs = model(**chunk)
-        score = torch.max(outputs.start_logits) + torch.max(outputs.end_logits)
-        if score > max_score:
-            max_score = score
-            start, end = torch.argmax(outputs.start_logits), torch.argmax(outputs.end_logits) + 1
-            best_ans = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][i][start:end]))
-    clean_ans = best_ans.replace("<s>", "").replace("</s>", "").strip()
-    return clean_ans if len(clean_ans) > 15 else None
-# --- 4. PDF Report Generator ---
-def create_pdf(results):
-    pdf = FPDF()
-    pdf.add_page()
-    pdf.set_font("Arial", 'B', 16)
-    pdf.cell(200, 10, txt="LexGuard AI - Contract Audit Report", ln=True, align='C')
-    pdf.set_font("Arial", size=10)
-    pdf.cell(200, 10, txt="Generated by Aya Samir | AI Legal Platform", ln=True, align='C')
-    pdf.ln(10)
-    for title, data in results.items():
-        pdf.set_font("Arial", 'B', 12)
-        pdf.cell(200, 10, txt=f"Clause: {title}", ln=True)
-        pdf.set_font("Arial", size=10)
-        pdf.multi_cell(0, 5, txt=f"Status: {data['status']}\nContent: {data['content']}\n", border=1)
-        pdf.ln(5)
-    return pdf.output(dest='S').encode('latin-1')
-# --- 5. Sidebar & File Handling ---
-with st.sidebar:
-    st.image("https://img.icons8.com/fluency/96/law.png", width=70)
-    st.title("LexGuard Control")
-    uploaded_file = st.file_uploader("Upload PDF or DOCX Contract", type=["pdf", "docx"])
-    st.markdown("---")
-    st.caption("Standard: UK/International Law Compliance")
-# --- 6. Main Dashboard UI ---
-st.title("⚖️ LexGuard Ultimate: Deep Contractual Audit")
-st.markdown("Automated Gap Analysis & Risk Assessment Engine")
 if uploaded_file:
-    # Processing Text
     if uploaded_file.type == "application/pdf":
         raw_text = "".join([p.extract_text() for p in PyPDF2.PdfReader(uploaded_file).pages])
     else:
         raw_text = "\n".join([p.text for p in Document(uploaded_file).paragraphs])
-    if st.button("🔍 Run Full Legal Audit"):
-        audit_results = {}
         audit_plan = {
-            "Governing Law": "What is the governing law and jurisdiction?",
-            "Termination Rights": "What are the notice periods and conditions for termination?",
-            "Limitation of Liability": "What is the maximum liability cap or exclusion of damages?",
-            "Indemnification": "What are the indemnification obligations and losses?",
-            "Confidentiality": "What are the non-disclosure obligations and their duration?",
-            "Force Majeure": "What events excuse performance under the contract?",
-            "Intellectual Property": "Who owns the intellectual property and work products?"
         }
-        st.subheader("🚩 Audit Dashboard")
         progress_bar = st.progress(0)
         for idx, (title, query) in enumerate(audit_plan.items()):
-            clause = get_legal_clause(query, raw_text)
-            status = "Standard/Detected"
-            with st.expander(f"📌 {title}", expanded=True):
                 if clause:
-                    sentiment = risk_engine(clause[:512])[0]
-                    # Risk Logic (Negative Sentiment or Specific Keywords)
-                    is_risky = sentiment['label'] == 'NEGATIVE' or any(word in clause.lower() for word in ["limit", "exclude", "waive"])
                     if is_risky:
-                        status = "🔴 HIGH RISK / AGGRESSIVE"
-                        st.error(status)
                     else:
-                        status = "🟢 LOW RISK / STANDARD"
-                        st.success(status)
-                    st.write("**Extracted Text:**")
                     st.code(clause, language="text")
                 else:
-                    status = "⚠️ CRITICAL OMISSION / NOT FOUND"
-                    st.warning(status)
-                    st.info(f"Legal Insight: Missing {title} clauses increase litigation risks under English Law.")
-                audit_results[title] = {"status": status, "content": clause if clause else "OMITTED"}
             progress_bar.progress((idx + 1) / len(audit_plan))
-        # PDF Download Option
-        pdf_data = create_pdf(audit_results)
-        b64 = base64.b64encode(pdf_data).decode()
-        href = f'<a href="data:application/octet-stream;base64,{b64}" download="LexGuard_Audit_Report.pdf" style="text-decoration:none;"><button style="width:100%; border-radius:5px; background-color:#4F46E5; color:white; padding:10px; border:none; cursor:pointer;">📥 Download Full Audit Report (PDF)</button></a>'
-        st.markdown(href, unsafe_allow_html=True)
 else:
-    st.info("👋 Welcome! Please upload a contract in the sidebar to start the deep analysis.")
-# --- 7. Footer ---
 st.markdown("---")
-st.caption("Developed by Aya Samir | AI, Data & Legal Consultant | Master's Researcher in Law")

 from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
 import PyPDF2
 from docx import Document
+# --- 1. إعدادات الصفحة ---
+st.set_page_config(page_title="LexGuard Precision", page_icon="⚖️", layout="wide")
+# --- 2. تحميل المحركات الذكية ---
 @st.cache_resource
 def load_engines():
+    # موديل استخراج النصوص القانونية (CUAD)
     tokenizer = AutoTokenizer.from_pretrained("marshmellow77/roberta-base-cuad")
     model = AutoModelForQuestionAnswering.from_pretrained("marshmellow77/roberta-base-cuad")
+    # موديل تحليل المخاطر
+    risk_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+    return tokenizer, model, risk_analyzer
+tokenizer, model, risk_analyzer = load_engines()
+# --- 3. منطق الاستخراج الدقيق ومنع التكرار ---
+def get_precise_clause(query, context):
+    # استخدام Stride أكبر لضمان عدم ضياع السياق بين النوافذ
     inputs = tokenizer(query, context, truncation="only_second", max_length=512, stride=256,
+                       return_overflowing_tokens=True, padding="max_length", return_tensors="pt")
+    best_answer = ""
+    max_score = -float('inf')
+    threshold = 2.5  # عتبة الثقة لمنع الموديل من تخمين إجابات خاطئة
     for i in range(len(inputs["input_ids"])):
         chunk = {"input_ids": inputs["input_ids"][i:i+1], "attention_mask": inputs["attention_mask"][i:i+1]}
         with torch.no_grad():
             outputs = model(**chunk)
+        # حساب قوة الإجابة (Confidence Score)
+        start_logits = outputs.start_logits
+        end_logits = outputs.end_logits
+        current_score = torch.max(start_logits) + torch.max(end_logits)
+        if current_score > threshold and current_score > max_score:
+            max_score = current_score
+            start_idx = torch.argmax(start_logits)
+            end_idx = torch.argmax(end_logits) + 1
+            best_answer = tokenizer.convert_tokens_to_string(
+                tokenizer.convert_ids_to_tokens(inputs["input_ids"][i][start_idx:end_idx])
+            )
+    # تنظيف النص من الرموز التقنية
+    clean_ans = best_answer.replace("<s>", "").replace("</s>", "").strip()
+    # التأكد أن النص ليس قصيراً جداً أو مجرد كلمات متقاطعة
+    return clean_ans if len(clean_ans) > 25 else None
+# --- 4. واجهة المستخدم ---
+st.title("⚖️ LexGuard Pro: Comprehensive Legal Audit")
+st.markdown("### نظام ال��دقيق القانوني الآلي وتحليل الثغرات")
+with st.sidebar:
+    st.header("📥 إدخال العقد")
+    uploaded_file = st.file_uploader("ارفع ملف العقد (PDF/DOCX)", type=["pdf", "docx"])
+    st.info("النظام مهيأ لتحليل العقود وفق معايير القانون الإنجليزي والدولي.")
 if uploaded_file:
+    # قراءة النص من الملف
     if uploaded_file.type == "application/pdf":
         raw_text = "".join([p.extract_text() for p in PyPDF2.PdfReader(uploaded_file).pages])
     else:
         raw_text = "\n".join([p.text for p in Document(uploaded_file).paragraphs])
+    if st.button("🚀 ابدأ التدقيق الشامل الآن"):
+        st.divider()
+        # خطة التدقيق القانوني (أسئلة دقيقة جداً للموديل)
         audit_plan = {
+            "Governing Law": "Which state or country's law governs this agreement and where is the jurisdiction?",
+            "Termination for Convenience": "What is the notice period for termination without cause?",
+            "Termination for Cause": "Under what conditions can the contract be terminated for breach?",
+            "Limitation of Liability": "What is the monetary cap or limit on the provider's liability?",
+            "Confidentiality Obligations": "What are the restrictions on disclosing trade secrets and for how long?",
+            "Indemnification": "Which party is responsible for defending third-party legal claims?",
+            "Force Majeure": "What unexpected events excuse a party from performing their duties?",
+            "Intellectual Property": "Who owns the copyright and ownership of the software and work product?"
         }
         progress_bar = st.progress(0)
         for idx, (title, query) in enumerate(audit_plan.items()):
+            clause = get_precise_clause(query, raw_text)
+            with st.expander(f"📌 بند: {title}", expanded=True):
                 if clause:
+                    # تحليل المخاطر (Sentiment + Keyword Check)
+                    sentiment = risk_analyzer(clause[:512])[0]
+                    risk_keywords = ["limit", "exclude", "sole discretion", "waive", "immediate"]
+                    is_risky = sentiment['label'] == 'NEGATIVE' or any(word in clause.lower() for word in risk_keywords)
                     if is_risky:
+                        st.error("🔴 تنبيه مخاطر: صياغة هذا البند قد تكون تقييدية أو مجحفة.")
                     else:
+                        st.success("🟢 بند قياسي: النص يتبع القواعد العامة المتعارف عليها.")
+                    st.write("**النص المستخرج من العقد:**")
                     st.code(clause, language="text")
                 else:
+                    # تحليل الفجوات (Gap Analysis)
+                    st.warning(f"⚠️ بند مفقود: لم يتم العثور على نص صريح يتعلق بـ ({title}).")
+                    st.info(f"نصيحة قانونية: غياب هذا البند في العقود الدولية قد يؤدي إلى نزاعات قضائية معقدة.")
             progress_bar.progress((idx + 1) / len(audit_plan))
+        st.balloons()
 else:
+    st.info("يرجى رفع ملف العقد من القائمة الجانبية لبدء عملية الفحص الآلي.")
 st.markdown("---")
+st.caption("Aya Samir | AI & Legal Consultant | Master's Researcher in Law")