Spaces:

moseleydev
/

Medical-Report-Extractive-Summarizer

Sleeping

App Files Files Community

moseleydev commited on 14 days ago

Commit

a30a7a5

verified ·

1 Parent(s): 3142c97

Update main.py

Browse files

Files changed (1) hide show

main.py +16 -4

main.py CHANGED Viewed

@@ -6,6 +6,7 @@ from sklearn.cluster import KMeans
 import torch
 import numpy as np
 import spacy
 import time
 app = FastAPI(
@@ -15,7 +16,7 @@ app = FastAPI(
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_methods=["*"],
     allow_headers=["*"],
 )
@@ -28,6 +29,14 @@ class ReportRequest(BaseModel):
     text: str
     num_sentences: int = 3
 @app.post("/api/summarize")
 def summarize_medical_report(request: ReportRequest):
     start_time = time.time()
@@ -35,23 +44,26 @@ def summarize_medical_report(request: ReportRequest):
     global tokenizer, model, nlp
     if model is None:
         print("Initializing SciBERT and SpaCy... This takes a moment.")
         # Load SciBERT
         model_name = "allenai/scibert_scivocab_uncased"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModel.from_pretrained(model_name)
         try:
             nlp = spacy.load("en_core_web_sm")
         except OSError:
-            import spacy.cli
             spacy.cli.download("en_core_web_sm")
             nlp = spacy.load("en_core_web_sm")
         print("Models loaded successfully!")
     # 1. Safely split text into sentences using SpaCy NLP
     doc = nlp(request.text)
     sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
-    # Edge case: Report is too short
     if len(sentences) <= request.num_sentences:
         return {"summary": request.text, "metadata": {"status": "too_short"}}
@@ -76,7 +88,7 @@ def summarize_medical_report(request: ReportRequest):
         idx = np.argmin(np.linalg.norm(embeddings - kmeans.cluster_centers_[i], axis=1))
         avg.append(idx)
-    # 4. Sort indices chronologically to maintain report flow
     avg = sorted(list(set(avg)))
     final_summary = " ".join([sentences[i] for i in avg])

 import torch
 import numpy as np
 import spacy
+import spacy.cli
 import time
 app = FastAPI(
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_methods=["*"],
     allow_headers=["*"],
 )
     text: str
     num_sentences: int = 3
+@app.get("/")
+def health_check():
+    return {
+        "status": "Engine is running",
+        "message": "Send POST requests to /api/summarize",
+        "docs": "Visit /docs for the Swagger UI"
+    }
 @app.post("/api/summarize")
 def summarize_medical_report(request: ReportRequest):
     start_time = time.time()
     global tokenizer, model, nlp
     if model is None:
         print("Initializing SciBERT and SpaCy... This takes a moment.")
         # Load SciBERT
         model_name = "allenai/scibert_scivocab_uncased"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModel.from_pretrained(model_name)
         try:
             nlp = spacy.load("en_core_web_sm")
         except OSError:
+            print("Downloading SpaCy English model...")
             spacy.cli.download("en_core_web_sm")
             nlp = spacy.load("en_core_web_sm")
         print("Models loaded successfully!")
     # 1. Safely split text into sentences using SpaCy NLP
     doc = nlp(request.text)
     sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
+    # Edge case: Report is too short to summarize
     if len(sentences) <= request.num_sentences:
         return {"summary": request.text, "metadata": {"status": "too_short"}}
         idx = np.argmin(np.linalg.norm(embeddings - kmeans.cluster_centers_[i], axis=1))
         avg.append(idx)
+    # 4. Sort indices chronologically to maintain original report flow
     avg = sorted(list(set(avg)))
     final_summary = " ".join([sentences[i] for i in avg])