Spaces:

WAQASCHANNA
/

Hackathon_Generative_AI

Sleeping

App Files Files Community

WAQASCHANNA commited on Aug 31, 2024

Commit

c7ddc0c

verified ·

1 Parent(s): 541879f

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -32

app.py CHANGED Viewed

@@ -30,13 +30,6 @@ def detect_encoding(file):
 def chunk_text(text, chunk_size=1000):
     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
-# Function to classify text as law-related or not using zero-shot classification
-def classify_text(text):
-    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-    candidate_labels = ["law-related", "not law-related"]
-    result = classifier(text[:512], candidate_labels=candidate_labels)
-    return result['labels'][0] == "law-related"
 # Main area - Display content and perform tasks
 if uploaded_file is not None:
     try:
@@ -48,36 +41,30 @@ if uploaded_file is not None:
         uploaded_file.seek(0)  # Reset file pointer to the beginning
         text = uploaded_file.read().decode(encoding)
-        # Classify the text before proceeding with summarization or NER
-        if classify_text(text):
-            st.write("This document is classified as law-related.")
-            # Chunk the text if it is too long
-            chunks = chunk_text(text, chunk_size=1000)
-            if task == "Summarization":
-                summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-                summarized_text = ""
-                # Summarize each chunk and combine the results
-                for chunk in chunks:
-                    if len(chunk.split()) > min_length:
-                        summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
-                        summarized_text += summary[0]['summary_text'] + " "
-                st.subheader("Summary:")
-                st.write(summarized_text)
-            elif task == "Named Entity Recognition (NER)":
-                ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
-                st.subheader("Named Entities:")
-                for chunk in chunks:
-                    entities = ner(chunk)
-                    for entity in entities:
-                        st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
-        else:
-            st.warning("The uploaded document does not contain law-related content. Please upload a legal document.")
     except IndexError as e:
         st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")

 def chunk_text(text, chunk_size=1000):
     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 # Main area - Display content and perform tasks
 if uploaded_file is not None:
     try:
         uploaded_file.seek(0)  # Reset file pointer to the beginning
         text = uploaded_file.read().decode(encoding)
+        # Chunk the text if it is too long
+        chunks = chunk_text(text, chunk_size=1000)
+        if task == "Summarization":
+            summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+            summarized_text = ""
+            # Summarize each chunk and combine the results
+            for chunk in chunks:
+                if len(chunk.split()) > min_length:
+                    summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
+                    summarized_text += summary[0]['summary_text'] + " "
+            st.subheader("Summary:")
+            st.write(summarized_text)
+        elif task == "Named Entity Recognition (NER)":
+            ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
+            st.subheader("Named Entities:")
+            for chunk in chunks:
+                entities = ner(chunk)
+                for entity in entities:
+                    st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
     except IndexError as e:
         st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")