Spaces:

Aya-Samir-Emam
/

Aya-AI

Sleeping

App Files Files Community

Aya-AI / app.py

Aya-Samir-Emam

Update app.py

0016e0e verified about 1 month ago

raw

history blame contribute delete

6.32 kB

	import streamlit as st
	import torch
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
	import PyPDF2
	from docx import Document

	# --- 1. إعدادات الصفحة ---
	st.set_page_config(page_title="LexGuard Precision", page_icon="⚖️", layout="wide")

	# --- 2. تحميل المحركات الذكية ---
	@st.cache_resource
	def load_engines():
	# موديل استخراج النصوص القانونية (CUAD)
	tokenizer = AutoTokenizer.from_pretrained("marshmellow77/roberta-base-cuad")
	model = AutoModelForQuestionAnswering.from_pretrained("marshmellow77/roberta-base-cuad")
	# موديل تحليل المخاطر
	risk_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
	return tokenizer, model, risk_analyzer

	tokenizer, model, risk_analyzer = load_engines()

	# --- 3. منطق الاستخراج الدقيق ومنع التكرار ---
	def get_precise_clause(query, context):
	# استخدام Stride أكبر لضمان عدم ضياع السياق بين النوافذ
	inputs = tokenizer(query, context, truncation="only_second", max_length=512, stride=256,
	return_overflowing_tokens=True, padding="max_length", return_tensors="pt")

	best_answer = ""
	max_score = -float('inf')
	threshold = 2.5 # عتبة الثقة لمنع الموديل من تخمين إجابات خاطئة

	for i in range(len(inputs["input_ids"])):
	chunk = {"input_ids": inputs["input_ids"][i:i+1], "attention_mask": inputs["attention_mask"][i:i+1]}
	with torch.no_grad():
	outputs = model(**chunk)

	# حساب قوة الإجابة (Confidence Score)
	start_logits = outputs.start_logits
	end_logits = outputs.end_logits
	current_score = torch.max(start_logits) + torch.max(end_logits)

	if current_score > threshold and current_score > max_score:
	max_score = current_score
	start_idx = torch.argmax(start_logits)
	end_idx = torch.argmax(end_logits) + 1
	best_answer = tokenizer.convert_tokens_to_string(
	tokenizer.convert_ids_to_tokens(inputs["input_ids"][i][start_idx:end_idx])
	)

	# تنظيف النص من الرموز التقنية
	clean_ans = best_answer.replace("<s>", "").replace("</s>", "").strip()
	# التأكد أن النص ليس قصيراً جداً أو مجرد كلمات متقاطعة
	return clean_ans if len(clean_ans) > 25 else None

	# --- 4. واجهة المستخدم ---
	st.title("⚖️ LexGuard Pro: Comprehensive Legal Audit")
	st.markdown("### نظام التدقيق القانوني الآلي وتحليل الثغرات")

	with st.sidebar:
	st.header("📥 إدخال العقد")
	uploaded_file = st.file_uploader("ارفع ملف العقد (PDF/DOCX)", type=["pdf", "docx"])
	st.info("النظام مهيأ لتحليل العقود وفق معايير القانون الإنجليزي والدولي.")

	if uploaded_file:
	# قراءة النص من الملف
	if uploaded_file.type == "application/pdf":
	raw_text = "".join([p.extract_text() for p in PyPDF2.PdfReader(uploaded_file).pages])
	else:
	raw_text = "\n".join([p.text for p in Document(uploaded_file).paragraphs])

	if st.button("🚀 ابدأ التدقيق الشامل الآن"):
	st.divider()

	# خطة التدقيق القانوني (أسئلة دقيقة جداً للموديل)
	audit_plan = {
	"Governing Law": "Which state or country's law governs this agreement and where is the jurisdiction?",
	"Termination for Convenience": "What is the notice period for termination without cause?",
	"Termination for Cause": "Under what conditions can the contract be terminated for breach?",
	"Limitation of Liability": "What is the monetary cap or limit on the provider's liability?",
	"Confidentiality Obligations": "What are the restrictions on disclosing trade secrets and for how long?",
	"Indemnification": "Which party is responsible for defending third-party legal claims?",
	"Force Majeure": "What unexpected events excuse a party from performing their duties?",
	"Intellectual Property": "Who owns the copyright and ownership of the software and work product?"
	}

	progress_bar = st.progress(0)

	for idx, (title, query) in enumerate(audit_plan.items()):
	clause = get_precise_clause(query, raw_text)

	with st.expander(f"📌 بند: {title}", expanded=True):
	if clause:
	# تحليل المخاطر (Sentiment + Keyword Check)
	sentiment = risk_analyzer(clause[:512])[0]
	risk_keywords = ["limit", "exclude", "sole discretion", "waive", "immediate"]
	is_risky = sentiment['label'] == 'NEGATIVE' or any(word in clause.lower() for word in risk_keywords)

	if is_risky:
	st.error("🔴 تنبيه مخاطر: صياغة هذا البند قد تكون تقييدية أو مجحفة.")
	else:
	st.success("🟢 بند قياسي: النص يتبع القواعد العامة المتعارف عليها.")

	st.write("النص المستخرج من العقد:")
	st.code(clause, language="text")
	else:
	# تحليل الفجوات (Gap Analysis)
	st.warning(f"⚠️ بند مفقود: لم يتم العثور على نص صريح يتعلق بـ ({title}).")
	st.info(f"نصيحة قانونية: غياب هذا البند في العقود الدولية قد يؤدي إلى نزاعات قضائية معقدة.")

	progress_bar.progress((idx + 1) / len(audit_plan))

	st.balloons()
	else:
	st.info("يرجى رفع ملف العقد من القائمة الجانبية لبدء عملية الفحص الآلي.")

	st.markdown("---")
	st.caption("Aya Samir \| AI & Legal Consultant \| Master's Researcher in Law")