glitz-dev commited on
Commit
4466fc1
·
1 Parent(s): 1fd1e18

get_annotation with parameters of text and context added

Browse files
Files changed (4) hide show
  1. biomed_annotator.py +326 -0
  2. hipaathesis.py +47 -26
  3. requirements.txt +14 -12
  4. test_annotations_api.py +20 -0
biomed_annotator.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ import re
4
+ import httpx
5
+ from typing import Optional, List, Literal, Any, Dict
6
+ from pydantic import BaseModel
7
+ from tenacity import retry, stop_after_attempt, wait_exponential
8
+
9
+ # --- 1. Schemas ---
10
+
11
+ QuestionCategory = Literal["Clinical", "Mechanism", "Evidence", "Methods", "Limitations", "NextStep"]
12
+
13
+ class GeneratedQuestion(BaseModel):
14
+ category: QuestionCategory
15
+ question: str
16
+ evidence_quote: str
17
+
18
+ import os
19
+ from dotenv import load_dotenv
20
+
21
+ load_dotenv()
22
+
23
+ # --- 2. Configuration ---
24
+
25
+ def get_hf_token_from_cache() -> str:
26
+ """Get HuggingFace token from local cache (from huggingface-cli login)"""
27
+ try:
28
+ from huggingface_hub import HfFolder
29
+ token = HfFolder.get_token()
30
+ if token:
31
+ print("[DEBUG] Found HuggingFace token from local cache")
32
+ return token
33
+ except ImportError:
34
+ print("[DEBUG] huggingface_hub not installed, cannot get token from cache")
35
+ except Exception as e:
36
+ print(f"[DEBUG] Could not get HF token from cache: {e}")
37
+ return ""
38
+
39
+ class Settings:
40
+ def __init__(self):
41
+ # LLM Provider: 'ollama', 'openai_compat', or 'huggingface'
42
+ self.llm_provider: str = os.getenv("LLM_PROVIDER", "huggingface")
43
+
44
+ # Ollama settings
45
+ self.ollama_base_url: str = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
46
+ self.ollama_model: str = os.getenv("OLLAMA_MODEL", "llama3.2") #qwen2.5:3b-instruct, lama3.2
47
+ self.ollama_timeout_s: int = int(os.getenv("OLLAMA_TIMEOUT_S", 300))
48
+
49
+ # OpenAI Compat settings
50
+ self.openai_compat_base_url: str = os.getenv("OPENAI_COMPAT_BASE_URL", "http://localhost:8080/v1")
51
+ self.openai_compat_model: str = os.getenv("OPENAI_COMPAT_MODEL", "gpt-4o")
52
+ self.openai_compat_api_key: str = os.getenv("OPENAI_COMPAT_API_KEY", "not-needed")
53
+ self.openai_compat_timeout_s: int = int(os.getenv("OPENAI_COMPAT_TIMEOUT_S", 120))
54
+
55
+ # HuggingFace Serverless Inference settings
56
+ self.hf_model: str = os.getenv("HF_MODEL", "microsoft/Phi-3-mini-4k-instruct")
57
+ # Try env var first, then fall back to local cache token
58
+ self.hf_api_key: str = os.getenv("HF_API_KEY", "") or get_hf_token_from_cache()
59
+
60
+ # Gen Settings
61
+ self.max_output_questions: int = int(os.getenv("MAX_OUTPUT_QUESTIONS", 6))
62
+
63
+ settings = Settings()
64
+
65
+ # --- 3. Prompts ---
66
+
67
+ SYSTEM_PROMPT = (
68
+ "You are a biomedical paper reading assistant. "
69
+ "Only use the provided text. Do not add external facts. "
70
+ "Every question MUST include an evidence_quote copied verbatim from the provided text."
71
+ )
72
+
73
+ def build_question_prompt(selected_text: str, context_text: str | None, section_title: str | None, page_start: int | None, page_end: int | None) -> str:
74
+ meta = []
75
+ if section_title:
76
+ meta.append(f"Section: {section_title}")
77
+ if page_start is not None:
78
+ meta.append(f"Pages: {page_start}-{page_end or page_start}")
79
+ meta_block = "\n".join(meta) if meta else "Section: Unknown"
80
+
81
+ context = (context_text or "").strip()
82
+ if not context:
83
+ context = selected_text.strip()
84
+
85
+ max_q = settings.max_output_questions
86
+
87
+ return f"""Task: Generate good questions from this paper excerpt.
88
+
89
+ Excerpt metadata:
90
+ {meta_block}
91
+
92
+ Highlighted text:
93
+ {selected_text.strip()}
94
+
95
+ Context (use this for grounding; do not go beyond it):
96
+ {context}
97
+
98
+ Output STRICT JSON with this schema:
99
+ {{
100
+ "questions": [
101
+ {{
102
+ "category": "Clinical|Mechanism|Evidence|Methods|Limitations|NextStep",
103
+ "question": "...",
104
+ "evidence_quote": "..."
105
+ }}
106
+ ]
107
+ }}
108
+
109
+ Rules:
110
+ - Output {max_q} questions.
111
+ - Questions must be specific and actionable.
112
+ - evidence_quote MUST be a verbatim substring from the Context text.
113
+ """
114
+
115
+ # --- 4. LLM Clients ---
116
+
117
+ class LLMError(RuntimeError):
118
+ pass
119
+
120
+ class BaseLLM:
121
+ def generate_json(self, system_prompt: str, user_prompt: str) -> str:
122
+ raise NotImplementedError
123
+
124
+ class OllamaLLM(BaseLLM):
125
+ def __init__(self, cfg: Settings):
126
+ self.base_url = cfg.ollama_base_url.rstrip("/")
127
+ self.model = cfg.ollama_model
128
+ self.timeout = cfg.ollama_timeout_s
129
+
130
+ @retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=0.5, min=0.5, max=2))
131
+ def generate_json(self, system_prompt: str, user_prompt: str) -> str:
132
+ url = f"{self.base_url}/api/generate"
133
+ payload = {
134
+ "model": self.model,
135
+ "prompt": user_prompt,
136
+ "system": system_prompt,
137
+ "format": "json",
138
+ "stream": False,
139
+ "options": {"temperature": 0.4, "top_p": 0.9, "num_predict": 700}
140
+ }
141
+ print(f"[DEBUG] Ollama request to {url} with model={self.model}")
142
+ try:
143
+ with httpx.Client(timeout=self.timeout) as client:
144
+ r = client.post(url, json=payload)
145
+ print(f"[DEBUG] Ollama response status: {r.status_code}")
146
+ if r.status_code != 200:
147
+ print(f"[DEBUG] Ollama error response: {r.text}")
148
+ r.raise_for_status()
149
+ data = r.json()
150
+ return data.get("response", "").strip()
151
+ except httpx.TimeoutException as e:
152
+ print(f"[DEBUG] Ollama timeout: {e}")
153
+ raise LLMError(f"Ollama generate timed out after {self.timeout}s: {e}")
154
+ except Exception as e:
155
+ print(f"[DEBUG] Ollama exception type={type(e).__name__}: {e}")
156
+ raise LLMError(f"Ollama generate failed: {e}")
157
+
158
+ class OpenAICompatLLM(BaseLLM):
159
+ def __init__(self, cfg: Settings):
160
+ self.base_url = cfg.openai_compat_base_url.rstrip("/")
161
+ self.model = cfg.openai_compat_model
162
+ self.api_key = cfg.openai_compat_api_key
163
+ self.timeout = cfg.openai_compat_timeout_s
164
+
165
+ @retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=0.5, min=0.5, max=2))
166
+ def generate_json(self, system_prompt: str, user_prompt: str) -> str:
167
+ url = f"{self.base_url}/chat/completions"
168
+ headers = {"Content-Type": "application/json"}
169
+ if self.api_key and self.api_key != "not-needed":
170
+ headers["Authorization"] = f"Bearer {self.api_key}"
171
+ payload = {
172
+ "model": self.model,
173
+ "messages": [
174
+ {"role": "system", "content": system_prompt},
175
+ {"role": "user", "content": user_prompt}
176
+ ],
177
+ "temperature": 0.4,
178
+ "top_p": 0.9,
179
+ "max_tokens": 900,
180
+ "response_format": {"type": "json_object"}
181
+ }
182
+ try:
183
+ with httpx.Client(timeout=self.timeout) as client:
184
+ r = client.post(url, headers=headers, json=payload)
185
+ r.raise_for_status()
186
+ data = r.json()
187
+ return (data["choices"][0]["message"]["content"] or "").strip()
188
+ except Exception as e:
189
+ raise LLMError(f"OpenAI-compat generate failed: {e}")
190
+
191
+ class HuggingFaceLLM(BaseLLM):
192
+ """HuggingFace LLM using router.huggingface.co (OpenAI-compatible API format)"""
193
+
194
+ def __init__(self, cfg: Settings):
195
+ self.model = cfg.hf_model
196
+ self.api_key = cfg.hf_api_key
197
+ self.timeout = 120
198
+
199
+ @retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=0.5, min=0.5, max=2))
200
+ def generate_json(self, system_prompt: str, user_prompt: str) -> str:
201
+ # HuggingFace router with OpenAI-compatible format (hosted on HuggingFace)
202
+ url = "https://router.huggingface.co/v1/chat/completions"
203
+ headers = {
204
+ "Authorization": f"Bearer {self.api_key}",
205
+ "Content-Type": "application/json"
206
+ }
207
+
208
+ print(f"[DEBUG] HuggingFace request to model: {self.model}")
209
+ print(f"[DEBUG] API key present: {bool(self.api_key and self.api_key != 'your_huggingface_api_key_here')}")
210
+
211
+ # OpenAI-compatible chat format (works with HuggingFace models)
212
+ payload = {
213
+ "model": self.model,
214
+ "messages": [
215
+ {"role": "system", "content": system_prompt},
216
+ {"role": "user", "content": user_prompt}
217
+ ],
218
+ "max_tokens": 800,
219
+ "temperature": 0.4
220
+ }
221
+
222
+ try:
223
+ with httpx.Client(timeout=self.timeout) as client:
224
+ r = client.post(url, headers=headers, json=payload)
225
+ print(f"[DEBUG] HuggingFace response status: {r.status_code}")
226
+ if r.status_code != 200:
227
+ print(f"[DEBUG] HuggingFace error response: {r.text}")
228
+ r.raise_for_status()
229
+ # OpenAI-compatible response format
230
+ data = r.json()
231
+ if "choices" in data and len(data["choices"]) > 0:
232
+ return data["choices"][0]["message"]["content"].strip()
233
+ return ""
234
+ except Exception as e:
235
+ print(f"[DEBUG] HuggingFace exception: {type(e).__name__}: {e}")
236
+ raise LLMError(f"HuggingFace generate failed: {e}")
237
+
238
+ def get_llm(cfg: Settings) -> BaseLLM:
239
+ provider = (cfg.llm_provider or "").lower().strip()
240
+ if provider == "ollama":
241
+ return OllamaLLM(cfg)
242
+ if provider == "openai_compat":
243
+ return OpenAICompatLLM(cfg)
244
+ if provider == "huggingface":
245
+ return HuggingFaceLLM(cfg)
246
+ raise ValueError(f"Unsupported LLM_PROVIDER: {provider}")
247
+
248
+
249
+ # --- 5. Generation Logic ---
250
+
251
+ _JSON_RE = re.compile(r"\{.*\}", re.DOTALL)
252
+
253
+ def _safe_extract_json(text: str) -> dict | None:
254
+ if not text:
255
+ return None
256
+ text = text.strip()
257
+ try:
258
+ return json.loads(text)
259
+ except Exception:
260
+ pass
261
+ m = _JSON_RE.search(text)
262
+ if m:
263
+ try:
264
+ return json.loads(m.group(0))
265
+ except Exception:
266
+ return None
267
+ return None
268
+
269
+ def generate_annotations(
270
+ selected_text: str,
271
+ context_text: str | None = None,
272
+ section_title: str | None = None,
273
+ page_start: int | None = None,
274
+ page_end: int | None = None,
275
+ config: Settings | None = None
276
+ ) -> List[Dict[str, Any]]:
277
+ """
278
+ Main entrypoint: Generate questions for selected text using LLM only.
279
+ Returns empty list if generation fails.
280
+ """
281
+ cfg = config or settings
282
+
283
+ # 1. Setup
284
+ llm = get_llm(cfg)
285
+ user_prompt = build_question_prompt(selected_text, context_text, section_title, page_start, page_end)
286
+
287
+ # 2. Generate
288
+ questions = []
289
+ try:
290
+ raw = llm.generate_json(SYSTEM_PROMPT, user_prompt)
291
+ parsed = _safe_extract_json(raw)
292
+
293
+ if parsed and isinstance(parsed, dict) and isinstance(parsed.get("questions"), list):
294
+ for q in parsed["questions"]:
295
+ try:
296
+ # Validate using Pydantic
297
+ item = GeneratedQuestion(**q).model_dump()
298
+ questions.append(item)
299
+ except Exception:
300
+ continue
301
+
302
+ # Limit to max questions
303
+ questions = questions[:cfg.max_output_questions]
304
+
305
+ except Exception as e:
306
+ print(f"LLM Generation failed: {e}")
307
+ # In 'only llm' mode, we do not fallback. We return empty or raise.
308
+ # Returning empty list to be safe.
309
+ return []
310
+
311
+ return questions
312
+
313
+
314
+ # --- 6. CLI Test ---
315
+ if __name__ == "__main__":
316
+ # Example usage
317
+ sample_text = "BRCA1 mutations significantly increase the risk of developing breast cancer."
318
+ sample_context = "In this study of 500 patients, we observed that BRCA1 mutations significantly increase the risk of developing breast cancer compared to controls."
319
+
320
+ print("Generating annotations...")
321
+ results = generate_annotations(
322
+ selected_text=sample_text,
323
+ context_text=sample_context,
324
+ section_title="Abstract"
325
+ )
326
+ print(json.dumps(results, indent=2))
hipaathesis.py CHANGED
@@ -122,7 +122,7 @@ except ImportError:
122
  OPENCV_AVAILABLE = False
123
 
124
  from questions import THESIS_QUESTIONS
125
- from pubtator_annotator import PubTatorAnnotator
126
 
127
  warnings.filterwarnings('ignore')
128
 
@@ -298,12 +298,13 @@ class SecureFileHandler:
298
  class HIPAACompliantThesisAnalyzer:
299
  """HIPAA-compliant version of the thesis analyzer"""
300
 
301
- def __init__(self, user_id=None, password=None, session_timeout=30, model_name="t5-small"):
302
  self.user_id = user_id or getpass.getuser()
303
  self.session_timeout = session_timeout # minutes
304
  self.session_start = datetime.now()
305
  self.last_activity = datetime.now()
306
  self.model_name = model_name
 
307
 
308
  # Map model names to their optimal tasks and parameters
309
  self.model_configs = {
@@ -332,6 +333,7 @@ class HIPAACompliantThesisAnalyzer:
332
  print(f"HIPAA-Compliant Thesis Analyzer initialized for user: {self.user_id}")
333
  print(f"Session timeout: {session_timeout} minutes")
334
  print(f"Encryption enabled: {'Yes' if password else 'No'}")
 
335
 
336
  def _initialize_analyzer(self):
337
  """Initialize the core analyzer components"""
@@ -1195,6 +1197,18 @@ class HIPAACompliantThesisAnalyzer:
1195
 
1196
  return answers
1197
 
 
 
 
 
 
 
 
 
 
 
 
 
1198
  def cleanup_session(self):
1199
  """Clean up session data securely"""
1200
  self.hipaa_logger.log_access(self.user_id, "SESSION_END", "THESIS_ANALYZER")
@@ -1270,28 +1284,6 @@ def get_answer(req: AnalyzeReq):
1270
  print(f"Error in get_answer: {e}")
1271
  return {"error": str(e)}
1272
 
1273
- @app.post('/get_annotations')
1274
- def get_annotations(req: AnalyzeReq):
1275
- """Get PubTator annotations only"""
1276
- try:
1277
- analyzer = HIPAACompliantThesisAnalyzer(
1278
- user_id=req.userId,
1279
- password=req.password,
1280
- session_timeout=30,
1281
- model_name=req.model_name
1282
- )
1283
-
1284
- report = analyzer.process_annotations_only(
1285
- pdf_path=req.storageKey,
1286
- output_file="hipaa_annotations_only"
1287
- )
1288
-
1289
- analyzer.cleanup_session()
1290
- return report
1291
- except Exception as e:
1292
- print(f"Error in get_annotations: {e}")
1293
- return {"error": str(e)}
1294
-
1295
  @app.post('/upload_db')
1296
  async def upload_db(upload_db: str = Form(...), pdf_file: UploadFile = File(...)):
1297
  """Read PDF, extract text & images + OCR, and save content to database"""
@@ -1583,7 +1575,6 @@ def download_pdf_from_url(document_url: str, verify_ssl: Optional[bool] = None)
1583
 
1584
  return response.content
1585
 
1586
-
1587
  @app.post('/extract_content')
1588
  async def extract_content(req: ExtractFromUrlRequest):
1589
  """
@@ -1638,7 +1629,8 @@ def analyze(req: AnalyzeReq):
1638
  user_id=req.userId,
1639
  password=req.password,
1640
  session_timeout=30,
1641
- model_name=req.model_name
 
1642
  )
1643
 
1644
  pdf_path = req.storageKey
@@ -1671,6 +1663,35 @@ def analyze(req: AnalyzeReq):
1671
  print(f"Error: {e}")
1672
  print("Ensure all requirements are installed and Tesseract is available.")
1673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1674
  #if __name__ == "__main__":
1675
  print("""
1676
  HIPAA-COMPLIANT THESIS ANALYZER
 
122
  OPENCV_AVAILABLE = False
123
 
124
  from questions import THESIS_QUESTIONS
125
+ from biomed_annotator import generate_annotations
126
 
127
  warnings.filterwarnings('ignore')
128
 
 
298
  class HIPAACompliantThesisAnalyzer:
299
  """HIPAA-compliant version of the thesis analyzer"""
300
 
301
+ def __init__(self, user_id=None, password=None, session_timeout=30, model_name="t5-small", mode="analyze"):
302
  self.user_id = user_id or getpass.getuser()
303
  self.session_timeout = session_timeout # minutes
304
  self.session_start = datetime.now()
305
  self.last_activity = datetime.now()
306
  self.model_name = model_name
307
+ self.mode = mode
308
 
309
  # Map model names to their optimal tasks and parameters
310
  self.model_configs = {
 
333
  print(f"HIPAA-Compliant Thesis Analyzer initialized for user: {self.user_id}")
334
  print(f"Session timeout: {session_timeout} minutes")
335
  print(f"Encryption enabled: {'Yes' if password else 'No'}")
336
+ print(f"Mode: {self.mode}")
337
 
338
  def _initialize_analyzer(self):
339
  """Initialize the core analyzer components"""
 
1197
 
1198
  return answers
1199
 
1200
+ def get_annotation(self, sample_text, sample_context):
1201
+ """Generate annotations using biomed_annotator"""
1202
+ try:
1203
+ return generate_annotations(
1204
+ selected_text=sample_text,
1205
+ context_text=sample_context
1206
+ )
1207
+ except Exception as e:
1208
+ print(f"Error in get_annotation: {e}")
1209
+ self.hipaa_logger.log_access(self.user_id, "ANNOTATION_ERROR", "TEXT_SELECTION", success=False)
1210
+ return []
1211
+
1212
  def cleanup_session(self):
1213
  """Clean up session data securely"""
1214
  self.hipaa_logger.log_access(self.user_id, "SESSION_END", "THESIS_ANALYZER")
 
1284
  print(f"Error in get_answer: {e}")
1285
  return {"error": str(e)}
1286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1287
  @app.post('/upload_db')
1288
  async def upload_db(upload_db: str = Form(...), pdf_file: UploadFile = File(...)):
1289
  """Read PDF, extract text & images + OCR, and save content to database"""
 
1575
 
1576
  return response.content
1577
 
 
1578
  @app.post('/extract_content')
1579
  async def extract_content(req: ExtractFromUrlRequest):
1580
  """
 
1629
  user_id=req.userId,
1630
  password=req.password,
1631
  session_timeout=30,
1632
+ model_name=req.model_name,
1633
+ mode="analyze"
1634
  )
1635
 
1636
  pdf_path = req.storageKey
 
1663
  print(f"Error: {e}")
1664
  print("Ensure all requirements are installed and Tesseract is available.")
1665
 
1666
+
1667
+ class AnnotationReq(BaseModel):
1668
+ userId: Optional[str] = None
1669
+ password: Optional[str] = None
1670
+ sample_text: str
1671
+ sample_context: Optional[str] = None
1672
+
1673
+ @app.post('/get_annotations')
1674
+ def get_annotations_api(req: AnnotationReq):
1675
+ """Get annotations for selected text"""
1676
+ try:
1677
+ analyzer = HIPAACompliantThesisAnalyzer(
1678
+ user_id=req.userId,
1679
+ password=req.password,
1680
+ mode="annotations"
1681
+ )
1682
+
1683
+ annotations = analyzer.get_annotation(
1684
+ sample_text=req.sample_text,
1685
+ sample_context=req.sample_context
1686
+ )
1687
+
1688
+ analyzer.cleanup_session()
1689
+ return annotations
1690
+
1691
+ except Exception as e:
1692
+ print(f"Error in get_annotations: {e}")
1693
+ return {"error": str(e)}
1694
+
1695
  #if __name__ == "__main__":
1696
  print("""
1697
  HIPAA-COMPLIANT THESIS ANALYZER
requirements.txt CHANGED
@@ -1,21 +1,23 @@
1
  cryptography==46.0.1
2
- fastapi==0.118.0
3
  PyMuPDF==1.22.5
4
  nltk==3.9.1
5
- numpy<2.3.0,>=2
6
  opencv_python==4.12.0.88
7
- Pillow==11.3.0
8
  pydantic==2.11.9
9
  PyPDF2==3.0.1
10
- pytesseract==0.3.13
11
  requests==2.31.0
12
- torch==2.8.0
13
- transformers==4.56.1
14
  urllib3==2.2.0
15
- uvicorn
16
- scikit-learn==1.4.2
17
- rank-bm25==0.2.2
18
- sentence-transformers==2.7.0
19
  pymupdf==1.24.9
20
- textstat==0.7.4
21
- psycopg2-binary==2.9.10
 
 
 
1
  cryptography==46.0.1
2
+ fastapi==0.118.0
3
  PyMuPDF==1.22.5
4
  nltk==3.9.1
5
+ numpy<2.3.0,>=2
6
  opencv_python==4.12.0.88
7
+ Pillow==11.3.0
8
  pydantic==2.11.9
9
  PyPDF2==3.0.1
10
+ pytesseract==0.3.13
11
  requests==2.31.0
12
+ torch==2.8.0
13
+ transformers==4.56.1
14
  urllib3==2.2.0
15
+ uvicorn
16
+ scikit-learn==1.4.2
17
+ rank-bm25==0.2.2
18
+ sentence-transformers==2.7.0
19
  pymupdf==1.24.9
20
+ textstat==0.7.4
21
+ psycopg2-binary==2.9.10
22
+ httpx
23
+ tenacity
test_annotations_api.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ url = "http://localhost:8000/get_annotations"
5
+ payload = {
6
+ "sample_text": "cancer",
7
+ "sample_context": "cancer is common in nowadays, its better to diagnosis in early stages. Recovery will be faster"
8
+ }
9
+
10
+ try:
11
+ print(f"Sending request to {url}...")
12
+ response = requests.post(url, json=payload, timeout=120)
13
+ print(f"Status Code: {response.status_code}")
14
+ print("Response Content:")
15
+ try:
16
+ print(json.dumps(response.json(), indent=2))
17
+ except:
18
+ print(response.text)
19
+ except Exception as e:
20
+ print(f"Error: {e}")