Spaces:

WAQASCHANNA
/

Hackathon_Generative_AI

Sleeping

App Files Files Community

WAQASCHANNA commited on Aug 31, 2024

Commit

b3a0493

verified ·

1 Parent(s): ba11a78

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -0

app.py CHANGED Viewed

	@@ -0,0 +1,99 @@

+import streamlit as st
+from transformers import pipeline
+import chardet
+st.title("Legal Document Analysis")
+# Sidebar for uploading the document
+st.sidebar.header("Upload Document")
+uploaded_file = st.sidebar.file_uploader("Choose a document", type=["txt"])
+# Sidebar for selecting task
+st.sidebar.header("Select Task")
+task = st.sidebar.selectbox("Choose the task you want to perform:", ("Summarization", "Named Entity Recognition (NER)"))
+# Sidebar for setting summarization parameters (only shown if summarization is selected)
+if task == "Summarization":
+    st.sidebar.header("Summarization Parameters")
+    max_length = st.sidebar.slider("Max Length", min_value=50, max_value=500, value=150)
+    min_length = st.sidebar.slider("Min Length", min_value=10, max_value=100, value=40)
+    do_sample = st.sidebar.checkbox("Use Sampling", value=False)
+# Function to detect file encoding
+def detect_encoding(file):
+    raw_data = file.read(1024)  # Read a small chunk of the file
+    result = chardet.detect(raw_data)
+    encoding = result['encoding']
+    return encoding
+# Function to split text into chunks
+def chunk_text(text, chunk_size=1000):
+    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+# Function to classify text as law-related or not using zero-shot classification
+def classify_text(text):
+    # Load the zero-shot classification pipeline
+    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+    # Define the candidate labels
+    candidate_labels = ["law-related", "not law-related"]
+    # Run the classifier with the candidate labels
+    result = classifier(text[:512], candidate_labels=candidate_labels)
+    st.write(f"Classification result: {result}")
+    # Check if the highest-scoring label is "law-related"
+    return result['labels'][0] == "law-related"
+# Main area - Display content and perform tasks
+if uploaded_file is not None:
+    try:
+        # Detect and decode the file content
+        encoding = detect_encoding(uploaded_file)
+        if encoding is None:
+            encoding = 'utf-8'  # Fallback to default encoding
+        uploaded_file.seek(0)  # Reset file pointer to the beginning
+        text = uploaded_file.read().decode(encoding)
+        st.write("File content loaded successfully!")  # Debugging: Confirm file loading
+        # Classify the text
+        if classify_text(text):
+            st.write("This document is classified as law-related.")  # Debugging: Confirm classification
+            chunks = chunk_text(text, chunk_size=1000)
+            if task == "Summarization":
+                summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+                summarized_text = ""
+                # Summarize each chunk and combine the results
+                for chunk in chunks:
+                    if len(chunk.split()) > min_length:
+                        summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
+                        summarized_text += summary[0]['summary_text'] + " "
+                st.subheader("Summary:")
+                st.write(summarized_text)
+            elif task == "Named Entity Recognition (NER)":
+                ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
+                st.subheader("Named Entities:")
+                for chunk in chunks:
+                    entities = ner(chunk)
+                    for entity in entities:
+                        st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
+        else:
+            st.warning("The uploaded document does not contain law-related content. Please upload a legal document.")
+    except IndexError as e:
+        st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")
+    except UnicodeDecodeError as e:
+        st.error(f"Encoding error: {e}. Please upload a file with valid encoding.")
+    except Exception as e:
+        st.error(f"An unexpected error occurred: {e}")
+else:
+    st.info("Please upload a document to analyze.")