WAQASCHANNA commited on
Commit
c7ddc0c
·
verified ·
1 Parent(s): 541879f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -32
app.py CHANGED
@@ -30,13 +30,6 @@ def detect_encoding(file):
30
  def chunk_text(text, chunk_size=1000):
31
  return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
32
 
33
- # Function to classify text as law-related or not using zero-shot classification
34
- def classify_text(text):
35
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
36
- candidate_labels = ["law-related", "not law-related"]
37
- result = classifier(text[:512], candidate_labels=candidate_labels)
38
- return result['labels'][0] == "law-related"
39
-
40
  # Main area - Display content and perform tasks
41
  if uploaded_file is not None:
42
  try:
@@ -48,36 +41,30 @@ if uploaded_file is not None:
48
  uploaded_file.seek(0) # Reset file pointer to the beginning
49
  text = uploaded_file.read().decode(encoding)
50
 
51
- # Classify the text before proceeding with summarization or NER
52
- if classify_text(text):
53
- st.write("This document is classified as law-related.")
54
-
55
- # Chunk the text if it is too long
56
- chunks = chunk_text(text, chunk_size=1000)
57
 
58
- if task == "Summarization":
59
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
60
- summarized_text = ""
61
 
62
- # Summarize each chunk and combine the results
63
- for chunk in chunks:
64
- if len(chunk.split()) > min_length:
65
- summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
66
- summarized_text += summary[0]['summary_text'] + " "
67
 
68
- st.subheader("Summary:")
69
- st.write(summarized_text)
70
 
71
- elif task == "Named Entity Recognition (NER)":
72
- ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
73
- st.subheader("Named Entities:")
74
 
75
- for chunk in chunks:
76
- entities = ner(chunk)
77
- for entity in entities:
78
- st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
79
- else:
80
- st.warning("The uploaded document does not contain law-related content. Please upload a legal document.")
81
 
82
  except IndexError as e:
83
  st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")
 
30
  def chunk_text(text, chunk_size=1000):
31
  return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
32
 
 
 
 
 
 
 
 
33
  # Main area - Display content and perform tasks
34
  if uploaded_file is not None:
35
  try:
 
41
  uploaded_file.seek(0) # Reset file pointer to the beginning
42
  text = uploaded_file.read().decode(encoding)
43
 
44
+ # Chunk the text if it is too long
45
+ chunks = chunk_text(text, chunk_size=1000)
 
 
 
 
46
 
47
+ if task == "Summarization":
48
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
49
+ summarized_text = ""
50
 
51
+ # Summarize each chunk and combine the results
52
+ for chunk in chunks:
53
+ if len(chunk.split()) > min_length:
54
+ summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
55
+ summarized_text += summary[0]['summary_text'] + " "
56
 
57
+ st.subheader("Summary:")
58
+ st.write(summarized_text)
59
 
60
+ elif task == "Named Entity Recognition (NER)":
61
+ ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
62
+ st.subheader("Named Entities:")
63
 
64
+ for chunk in chunks:
65
+ entities = ner(chunk)
66
+ for entity in entities:
67
+ st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
 
 
68
 
69
  except IndexError as e:
70
  st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")