moseleydev commited on
Commit
a30a7a5
·
verified ·
1 Parent(s): 3142c97

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +16 -4
main.py CHANGED
@@ -6,6 +6,7 @@ from sklearn.cluster import KMeans
6
  import torch
7
  import numpy as np
8
  import spacy
 
9
  import time
10
 
11
  app = FastAPI(
@@ -15,7 +16,7 @@ app = FastAPI(
15
 
16
  app.add_middleware(
17
  CORSMiddleware,
18
- allow_origins=["*"],
19
  allow_methods=["*"],
20
  allow_headers=["*"],
21
  )
@@ -28,6 +29,14 @@ class ReportRequest(BaseModel):
28
  text: str
29
  num_sentences: int = 3
30
 
 
 
 
 
 
 
 
 
31
  @app.post("/api/summarize")
32
  def summarize_medical_report(request: ReportRequest):
33
  start_time = time.time()
@@ -35,23 +44,26 @@ def summarize_medical_report(request: ReportRequest):
35
  global tokenizer, model, nlp
36
  if model is None:
37
  print("Initializing SciBERT and SpaCy... This takes a moment.")
 
38
  # Load SciBERT
39
  model_name = "allenai/scibert_scivocab_uncased"
40
  tokenizer = AutoTokenizer.from_pretrained(model_name)
41
  model = AutoModel.from_pretrained(model_name)
 
42
  try:
43
  nlp = spacy.load("en_core_web_sm")
44
  except OSError:
45
- import spacy.cli
46
  spacy.cli.download("en_core_web_sm")
47
  nlp = spacy.load("en_core_web_sm")
 
48
  print("Models loaded successfully!")
49
 
50
  # 1. Safely split text into sentences using SpaCy NLP
51
  doc = nlp(request.text)
52
  sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
53
 
54
- # Edge case: Report is too short
55
  if len(sentences) <= request.num_sentences:
56
  return {"summary": request.text, "metadata": {"status": "too_short"}}
57
 
@@ -76,7 +88,7 @@ def summarize_medical_report(request: ReportRequest):
76
  idx = np.argmin(np.linalg.norm(embeddings - kmeans.cluster_centers_[i], axis=1))
77
  avg.append(idx)
78
 
79
- # 4. Sort indices chronologically to maintain report flow
80
  avg = sorted(list(set(avg)))
81
  final_summary = " ".join([sentences[i] for i in avg])
82
 
 
6
  import torch
7
  import numpy as np
8
  import spacy
9
+ import spacy.cli
10
  import time
11
 
12
  app = FastAPI(
 
16
 
17
  app.add_middleware(
18
  CORSMiddleware,
19
+ allow_origins=["*"],
20
  allow_methods=["*"],
21
  allow_headers=["*"],
22
  )
 
29
  text: str
30
  num_sentences: int = 3
31
 
32
+ @app.get("/")
33
+ def health_check():
34
+ return {
35
+ "status": "Engine is running",
36
+ "message": "Send POST requests to /api/summarize",
37
+ "docs": "Visit /docs for the Swagger UI"
38
+ }
39
+
40
  @app.post("/api/summarize")
41
  def summarize_medical_report(request: ReportRequest):
42
  start_time = time.time()
 
44
  global tokenizer, model, nlp
45
  if model is None:
46
  print("Initializing SciBERT and SpaCy... This takes a moment.")
47
+
48
  # Load SciBERT
49
  model_name = "allenai/scibert_scivocab_uncased"
50
  tokenizer = AutoTokenizer.from_pretrained(model_name)
51
  model = AutoModel.from_pretrained(model_name)
52
+
53
  try:
54
  nlp = spacy.load("en_core_web_sm")
55
  except OSError:
56
+ print("Downloading SpaCy English model...")
57
  spacy.cli.download("en_core_web_sm")
58
  nlp = spacy.load("en_core_web_sm")
59
+
60
  print("Models loaded successfully!")
61
 
62
  # 1. Safely split text into sentences using SpaCy NLP
63
  doc = nlp(request.text)
64
  sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
65
 
66
+ # Edge case: Report is too short to summarize
67
  if len(sentences) <= request.num_sentences:
68
  return {"summary": request.text, "metadata": {"status": "too_short"}}
69
 
 
88
  idx = np.argmin(np.linalg.norm(embeddings - kmeans.cluster_centers_[i], axis=1))
89
  avg.append(idx)
90
 
91
+ # 4. Sort indices chronologically to maintain original report flow
92
  avg = sorted(list(set(avg)))
93
  final_summary = " ".join([sentences[i] for i in avg])
94