Spaces:

AISA-Framework
/

PolicySummarizer

Sleeping

App Files Files Community

Nadasr commited on 23 days ago

Commit

7fae465

verified ·

1 Parent(s): ba11860

Upload 4 files

Browse files

Files changed (4) hide show

app.py +73 -0
crew.py +127 -0
evaluation.ipynb +180 -0
requirements.txt +25 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""
+Policy Summarizer - Gradio App
+"""
+import gradio as gr
+from dotenv import load_dotenv
+from crew import run_policy_analysis
+from utils.validators import validate_url, is_likely_policy_url
+from utils.logger import format_logs_for_display, clear_logs
+load_dotenv()
+def process_policy(url: str):
+    """Process a policy URL and return summary + logs"""
+    clear_logs()
+    # Validate URL
+    is_valid, error_msg = validate_url(url)
+    if not is_valid:
+        return f"❌ **Invalid URL:** {error_msg}", "Validation failed"
+    # Warning for non-policy URLs
+    warning = ""
+    if not is_likely_policy_url(url):
+        warning = "⚠️ This URL may not be a policy page.\n\n"
+    try:
+        result = run_policy_analysis(url)
+        logs = format_logs_for_display()
+        return warning + result, logs
+    except Exception as e:
+        return f"❌ **Error:** {str(e)}", format_logs_for_display()
+# Create Gradio interface
+with gr.Blocks(title="Policy Summarizer") as app:
+    gr.Markdown("# 🔍 Policy Summarizer")
+    gr.Markdown("""
+    Paste a link to any Privacy Policy or Terms of Service, and AI agents will:
+    - 📄 **Summarize** the key points
+    - ✅ **Highlight** your rights
+    - ⚠️ **Warn** about concerns
+    """)
+    with gr.Row():
+        url_input = gr.Textbox(
+            label="Policy URL",
+            placeholder="https://example.com/privacy-policy",
+            scale=4
+        )
+        analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1)
+    gr.Markdown("### Examples:")
+    gr.Markdown("- https://discord.com/privacy")
+    gr.Markdown("- https://www.spotify.com/legal/privacy-policy/")
+    with gr.Tabs():
+        with gr.TabItem("📋 Summary"):
+            output_summary = gr.Markdown(value="*Enter a URL and click Analyze*")
+        with gr.TabItem("📊 Logs"):
+            output_logs = gr.Markdown(value="*Logs appear here*")
+    analyze_btn.click(
+        fn=process_policy,
+        inputs=[url_input],
+        outputs=[output_summary, output_logs]
+    )
+if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)

crew.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""
+CrewAI Configuration - Policy Summarizer
+"""
+import os
+from crewai import Agent, Task, Crew, Process
+from tools.web_scraper import web_scraper_tool
+from tools.text_analyzer import text_analyzer_tool
+from utils.logger import log_agent_action, clear_logs
+def create_agents():
+    """Create the 3 agents"""
+    orchestrator = Agent(
+        role="Policy Analysis Orchestrator",
+        goal="Coordinate the policy analysis and create a user-friendly summary",
+        backstory="""You are an expert at analyzing legal documents and presenting
+        complex information in simple terms. You coordinate the analysis workflow.""",
+        verbose=True,
+        allow_delegation=True
+    )
+    scraper = Agent(
+        role="Web Content Scraper",
+        goal="Extract clean policy text from web URLs",
+        backstory="""You specialize in web scraping and content extraction.
+        You can extract policy text while filtering out irrelevant content.""",
+        verbose=True,
+        allow_delegation=False,
+        tools=[web_scraper_tool]
+    )
+    analyzer = Agent(
+        role="Policy Analyzer",
+        goal="Analyze policies to identify key points, rights, and concerns",
+        backstory="""You are a legal expert who analyzes terms of service and
+        privacy policies. You identify user rights and potential red flags.""",
+        verbose=True,
+        allow_delegation=False,
+        tools=[text_analyzer_tool]
+    )
+    return orchestrator, scraper, analyzer
+def create_tasks(orchestrator, scraper, analyzer, url: str):
+    """Create the tasks for each agent"""
+    scrape_task = Task(
+        description=f"""
+        Scrape the policy content from: {url}
+        Use the web_scraper_tool to fetch and extract the text.
+        Return the full policy text content.
+        """,
+        expected_output="The extracted policy text content",
+        agent=scraper
+    )
+    analyze_task = Task(
+        description="""
+        Analyze the scraped policy content:
+        1. Use text_analyzer_tool to identify key sections
+        2. Find user rights (deletion, access, opt-out, etc.)
+        3. Identify concerns and red flags
+        4. Note data collection and sharing practices
+        """,
+        expected_output="Structured analysis with sections, rights, and concerns",
+        agent=analyzer,
+        context=[scrape_task]
+    )
+    summary_task = Task(
+        description="""
+        Create a user-friendly summary with these sections:
+        ## 📄 Policy Summary
+        [3-5 key points about this policy]
+        ## ✅ Your Rights
+        [List user rights with brief explanations]
+        ## ⚠️ Concerns & Warnings
+        [List red flags with severity: 🔴 High, 🟡 Medium, 🟢 Low]
+        ## 💡 Recommendation
+        [Overall assessment and advice]
+        Use simple language, avoid legal jargon.
+        """,
+        expected_output="A formatted, user-friendly policy summary",
+        agent=orchestrator,
+        context=[scrape_task, analyze_task]
+    )
+    return [scrape_task, analyze_task, summary_task]
+def run_policy_analysis(url: str) -> str:
+    """Main function to analyze a policy URL"""
+    clear_logs()
+    log_agent_action(
+        agent_name="System",
+        action="Starting Analysis",
+        input_summary=f"URL length: {len(url)}",
+        output_summary="Initializing agents...",
+        duration_seconds=0,
+        success=True
+    )
+    try:
+        orchestrator, scraper, analyzer = create_agents()
+        tasks = create_tasks(orchestrator, scraper, analyzer, url)
+        crew = Crew(
+            agents=[orchestrator, scraper, analyzer],
+            tasks=tasks,
+            process=Process.sequential,
+            verbose=True
+        )
+        result = crew.kickoff()
+        return str(result)
+    except Exception as e:
+        return f"❌ Error: {str(e)}"

evaluation.ipynb ADDED Viewed

	@@ -0,0 +1,180 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "from utils.validators import validate_url, is_likely_policy_url\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TEST_URLS = [\n",
+    "    {\"company\": \"Discord\", \"url\": \"https://discord.com/privacy\"},\n",
+    "    {\"company\": \"Spotify\", \"url\": \"https://www.spotify.com/legal/privacy-policy/\"},\n",
+    "    {\"company\": \"Reddit\", \"url\": \"https://www.reddit.com/policies/privacy-policy\"},\n",
+    "    {\"company\": \"Netflix\", \"url\": \"https://www.netflix.com/privacy\"},\n",
+    "    {\"company\": \"Twitter\", \"url\": \"https://twitter.com/en/tos\"},\n",
+    "    {\"company\": \"TikTok\", \"url\": \"https://www.tiktok.com/legal/privacy-policy\"},\n",
+    "    {\"company\": \"LinkedIn\", \"url\": \"https://www.linkedin.com/legal/privacy-policy\"},\n",
+    "    {\"company\": \"Google\", \"url\": \"https://policies.google.com/privacy\"},\n",
+    "    {\"company\": \"Apple\", \"url\": \"https://www.apple.com/legal/privacy/\"},\n",
+    "    {\"company\": \"Amazon\", \"url\": \"https://www.amazon.com/gp/help/customer/display.html\"}\n",
+    "]\n",
+    "\n",
+    "BAD_URLS = [\n",
+    "    {\"name\": \"Empty\", \"url\": \"\"},\n",
+    "    {\"name\": \"No protocol\", \"url\": \"google.com\"},\n",
+    "    {\"name\": \"Localhost\", \"url\": \"http://localhost/test\"},\n",
+    "    {\"name\": \"Private IP\", \"url\": \"http://192.168.1.1/page\"}\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Discord': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Spotify': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Reddit': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Netflix': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Twitter': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'TikTok': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'LinkedIn': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Google': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Apple': {'valid': True, 'is_policy': True, 'status': 'PASS'},\n",
+       " 'Amazon': {'valid': True, 'is_policy': False, 'status': 'PASS'}}"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "url_results = {}\n",
+    "\n",
+    "for test in TEST_URLS:\n",
+    "    valid, msg = validate_url(test['url'])\n",
+    "    is_policy = is_likely_policy_url(test['url'])\n",
+    "    url_results[test['company']] = {\n",
+    "        \"valid\": valid,\n",
+    "        \"is_policy\": is_policy,\n",
+    "        \"status\": \"PASS\" if valid else \"FAIL\"\n",
+    "    }\n",
+    "\n",
+    "url_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Empty': {'blocked': True,\n",
+       "  'message': 'URL cannot be empty',\n",
+       "  'status': 'PASS'},\n",
+       " 'No protocol': {'blocked': True,\n",
+       "  'message': 'Invalid URL format. Must start with http:// or https://',\n",
+       "  'status': 'PASS'},\n",
+       " 'Localhost': {'blocked': True,\n",
+       "  'message': 'Cannot scrape localhost or private addresses',\n",
+       "  'status': 'PASS'},\n",
+       " 'Private IP': {'blocked': False, 'message': '', 'status': 'FAIL'}}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "safety_results = {}\n",
+    "\n",
+    "for test in BAD_URLS:\n",
+    "    valid, msg = validate_url(test['url'])\n",
+    "    safety_results[test['name']] = {\n",
+    "        \"blocked\": not valid,\n",
+    "        \"message\": msg,\n",
+    "        \"status\": \"PASS\" if not valid else \"FAIL\"\n",
+    "    }\n",
+    "\n",
+    "safety_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'date': '2026-02-12 20:27',\n",
+       " 'url_validation': '10/10',\n",
+       " 'safety_tests': '3/4',\n",
+       " 'overall': 'PASS'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "url_passed = sum(1 for r in url_results.values() if r['status'] == 'PASS')\n",
+    "safety_passed = sum(1 for r in safety_results.values() if r['status'] == 'PASS')\n",
+    "\n",
+    "{\n",
+    "    \"date\": datetime.now().strftime(\"%Y-%m-%d %H:%M\"),\n",
+    "    \"url_validation\": f\"{url_passed}/10\",\n",
+    "    \"safety_tests\": f\"{safety_passed}/4\",\n",
+    "    \"overall\": \"PASS\" if url_passed >= 8 and safety_passed >=3 else \"FAIL\"\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+# Policy Summarizer - Requirements
+# Install with: pip install -r requirements.txt
+# CrewAI Framework
+crewai
+crewai-tools
+# LangChain for LLM
+langchain>=0.1.0
+langchain-openai>=0.0.5
+# Web Scraping
+requests>=2.31.0
+beautifulsoup4>=4.12.0
+# UI
+gradio>=4.0.0
+# Data Validation
+pydantic>=2.0.0
+# Environment Variables
+python-dotenv>=1.0.0
+# Utilities
+tenacity>=8.2.0  # For retries