Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,13 +3,12 @@ from transformers import pipeline
|
|
| 3 |
import pdfplumber
|
| 4 |
from PIL import Image
|
| 5 |
import easyocr
|
| 6 |
-
from langdetect import detect
|
| 7 |
|
| 8 |
# Initialize Models
|
| 9 |
@st.cache_resource
|
| 10 |
def initialize_models():
|
| 11 |
return {
|
| 12 |
-
"report_check_model": pipeline("
|
| 13 |
"sentiment_model": pipeline("sentiment-analysis"),
|
| 14 |
"summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
|
| 15 |
"translation_model": {
|
|
@@ -34,6 +33,10 @@ def extract_text_from_image(image_file):
|
|
| 34 |
result = reader.readtext(image, detail=0) # `detail=0` returns only the text
|
| 35 |
return " ".join(result).strip()
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# Check if content is a lab report
|
| 38 |
def is_lab_report(text, model):
|
| 39 |
result = model(text, candidate_labels=["lab report", "not lab report"])
|
|
@@ -81,25 +84,29 @@ def main():
|
|
| 81 |
extracted_text = uploaded_file.read().decode("utf-8")
|
| 82 |
else:
|
| 83 |
st.error("Unsupported file type.")
|
|
|
|
| 84 |
|
| 85 |
if extracted_text:
|
| 86 |
st.subheader("Extracted Content")
|
| 87 |
st.text_area("Extracted Text", extracted_text, height=200)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
# Check if it's a lab report
|
| 90 |
-
if not is_lab_report(
|
| 91 |
st.error("The uploaded file does not appear to be a lab report.")
|
| 92 |
return
|
| 93 |
|
| 94 |
st.success("The uploaded file is a valid lab report.")
|
| 95 |
|
| 96 |
# Sentiment Analysis
|
| 97 |
-
sentiment, confidence = analyze_sentiment(
|
| 98 |
st.subheader("Sentiment Analysis")
|
| 99 |
st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
|
| 100 |
|
| 101 |
# Summarization
|
| 102 |
-
summary = summarize_content(
|
| 103 |
st.subheader("Summary")
|
| 104 |
st.text_area("Summary", summary, height=150)
|
| 105 |
|
|
@@ -114,4 +121,4 @@ def main():
|
|
| 114 |
st.error("Could not extract text from the uploaded file.")
|
| 115 |
|
| 116 |
if __name__ == "__main__":
|
| 117 |
-
main()
|
|
|
|
| 3 |
import pdfplumber
|
| 4 |
from PIL import Image
|
| 5 |
import easyocr
|
|
|
|
| 6 |
|
| 7 |
# Initialize Models
|
| 8 |
@st.cache_resource
|
| 9 |
def initialize_models():
|
| 10 |
return {
|
| 11 |
+
"report_check_model": pipeline("zero-shot-classification", model="facebook/bart-large-mnli"),
|
| 12 |
"sentiment_model": pipeline("sentiment-analysis"),
|
| 13 |
"summarize_model": pipeline("summarization", model="facebook/bart-large-cnn"),
|
| 14 |
"translation_model": {
|
|
|
|
| 33 |
result = reader.readtext(image, detail=0) # `detail=0` returns only the text
|
| 34 |
return " ".join(result).strip()
|
| 35 |
|
| 36 |
+
# Preprocess text for model input
|
| 37 |
+
def preprocess_text(text, max_length=1024):
|
| 38 |
+
return text[:max_length] if len(text) > max_length else text
|
| 39 |
+
|
| 40 |
# Check if content is a lab report
|
| 41 |
def is_lab_report(text, model):
|
| 42 |
result = model(text, candidate_labels=["lab report", "not lab report"])
|
|
|
|
| 84 |
extracted_text = uploaded_file.read().decode("utf-8")
|
| 85 |
else:
|
| 86 |
st.error("Unsupported file type.")
|
| 87 |
+
return
|
| 88 |
|
| 89 |
if extracted_text:
|
| 90 |
st.subheader("Extracted Content")
|
| 91 |
st.text_area("Extracted Text", extracted_text, height=200)
|
| 92 |
|
| 93 |
+
# Preprocess text
|
| 94 |
+
preprocessed_text = preprocess_text(extracted_text)
|
| 95 |
+
|
| 96 |
# Check if it's a lab report
|
| 97 |
+
if not is_lab_report(preprocessed_text, models["report_check_model"]):
|
| 98 |
st.error("The uploaded file does not appear to be a lab report.")
|
| 99 |
return
|
| 100 |
|
| 101 |
st.success("The uploaded file is a valid lab report.")
|
| 102 |
|
| 103 |
# Sentiment Analysis
|
| 104 |
+
sentiment, confidence = analyze_sentiment(preprocessed_text, models["sentiment_model"])
|
| 105 |
st.subheader("Sentiment Analysis")
|
| 106 |
st.write(f"**Sentiment**: {sentiment} (Confidence: {confidence:.2f})")
|
| 107 |
|
| 108 |
# Summarization
|
| 109 |
+
summary = summarize_content(preprocessed_text, models["summarize_model"])
|
| 110 |
st.subheader("Summary")
|
| 111 |
st.text_area("Summary", summary, height=150)
|
| 112 |
|
|
|
|
| 121 |
st.error("Could not extract text from the uploaded file.")
|
| 122 |
|
| 123 |
if __name__ == "__main__":
|
| 124 |
+
main()
|