File size: 3,667 Bytes
af8f925
32cccf7
 
 
af8f925
32cccf7
af8f925
 
 
07cbcbe
 
 
 
 
 
 
 
32cccf7
af8f925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32cccf7
af8f925
 
 
32cccf7
af8f925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32cccf7
af8f925
 
 
 
32cccf7
af8f925
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import fitz  # PyMuPDF
import docx
import nltk
import spacy
import networkx as nx
import matplotlib.pyplot as plt
from transformers import pipeline
from collections import Counter
import os

# Ensure spaCy model is available
try:
    nlp = spacy.load("en_core_web_lg")
except OSError:
    os.system("python -m spacy download en_core_web_lg")
    nlp = spacy.load("en_core_web_lg")

# Load NLP Models
nltk.download("punkt")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering")

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = "\n".join([page.get_text("text") for page in doc])
    return text

# Function to extract text from DOCX
def extract_text_from_docx(docx_file):
    doc = docx.Document(docx_file)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

# Summarization function
def summarize_text(text):
    return summarizer(text, max_length=200, min_length=50, do_sample=False)[0]["summary_text"]

# Q&A Function
def answer_question(text, question):
    return qa_pipeline({"context": text, "question": question})["answer"]

# Named Entity Recognition (NER)
def extract_entities(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Generate Mind Map
def generate_mind_map(text):
    doc = nlp(text)
    entity_counts = Counter([ent.text for ent in doc.ents])
    
    G = nx.Graph()
    for entity, count in entity_counts.items():
        G.add_node(entity, size=count * 100)
    
    pos = nx.spring_layout(G)
    plt.figure(figsize=(10, 7))
    nx.draw(G, pos, with_labels=True, node_size=[G.nodes[n]['size'] for n in G.nodes], node_color="skyblue")
    plt.title("Mind Map of Entities")
    st.pyplot(plt)

# Streamlit UI
st.set_page_config(page_title="Legal Document Summarizer & Query System", layout="wide")
st.title("πŸ“œ Legal Document Summarization, NER & Mind Map System")
st.markdown("""Upload a legal document, get a summary, extract entities, and generate a mind map!""")

# File uploader
uploaded_file = st.file_uploader("Upload a PDF or DOCX", type=["pdf", "docx"])

if uploaded_file:
    if uploaded_file.type == "application/pdf":
        document_text = extract_text_from_pdf(uploaded_file)
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        document_text = extract_text_from_docx(uploaded_file)
    else:
        st.error("Unsupported file format!")
        st.stop()
    
    st.subheader("Extracted Text Preview")
    st.text_area("Document Content", document_text[:2000], height=250)
    
    # Summarization
    if st.button("Summarize Document"):
        summary = summarize_text(document_text)
        st.subheader("πŸ“Œ Summary")
        st.success(summary)
    
    # Question Answering
    user_question = st.text_input("Ask a question about the document:")
    if user_question:
        answer = answer_question(document_text, user_question)
        st.subheader("πŸ“ Answer")
        st.info(answer)
    
    # Named Entity Recognition
    if st.button("Extract Entities"):
        entities = extract_entities(document_text)
        st.subheader("πŸ“Œ Named Entities")
        for entity, label in entities:
            st.write(f"**{entity}** - {label}")
    
    # Mind Map Generation
    if st.button("Generate Mind Map"):
        st.subheader("🧠 Mind Map of Entities")
        generate_mind_map(document_text)

st.markdown("---")
st.caption("πŸš€ Built with Hugging Face, spaCy, and Streamlit")