Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -108,7 +108,6 @@ def main():
|
|
| 108 |
for page in reader.pages:
|
| 109 |
text = page.extract_text()
|
| 110 |
text_list.append(text)
|
| 111 |
-
st.write('PDF pages read')
|
| 112 |
else:
|
| 113 |
st.error("Please upload your own PDF to be analyzed")
|
| 114 |
st.stop()
|
|
@@ -118,20 +117,20 @@ def main():
|
|
| 118 |
|
| 119 |
sentences = nltk.sent_tokenize(text_list_final)
|
| 120 |
|
| 121 |
-
st.write('tokeznization completed')
|
| 122 |
result =[]
|
| 123 |
for i in sentences:
|
| 124 |
result1 = i.lower()
|
| 125 |
result2 = re.sub(r'[^\w\s]','',result1)
|
| 126 |
result.append(result2)
|
| 127 |
|
| 128 |
-
|
| 129 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
| 130 |
|
| 131 |
model_path = "checkpoint-2850"
|
| 132 |
|
| 133 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
| 134 |
|
|
|
|
| 135 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
| 136 |
for sent in result:
|
| 137 |
pred = pipe1(sent)
|
|
|
|
| 108 |
for page in reader.pages:
|
| 109 |
text = page.extract_text()
|
| 110 |
text_list.append(text)
|
|
|
|
| 111 |
else:
|
| 112 |
st.error("Please upload your own PDF to be analyzed")
|
| 113 |
st.stop()
|
|
|
|
| 117 |
|
| 118 |
sentences = nltk.sent_tokenize(text_list_final)
|
| 119 |
|
|
|
|
| 120 |
result =[]
|
| 121 |
for i in sentences:
|
| 122 |
result1 = i.lower()
|
| 123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
| 124 |
result.append(result2)
|
| 125 |
|
| 126 |
+
st.write("--- %s seconds ---" % (time.time() - start_time))
|
| 127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
| 128 |
|
| 129 |
model_path = "checkpoint-2850"
|
| 130 |
|
| 131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
| 132 |
|
| 133 |
+
st.write('base sequence classification loaded')
|
| 134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
| 135 |
for sent in result:
|
| 136 |
pred = pipe1(sent)
|