formatting
Browse files
app.py
CHANGED
|
@@ -93,7 +93,6 @@ def get_embedding_dimensions(model_id):
|
|
| 93 |
return len(sample_embedding)
|
| 94 |
|
| 95 |
# ==================== QDRANT SETUP ====================
|
| 96 |
-
# Initialize Qdrant (in-memory)
|
| 97 |
qdrant_client = QdrantClient(":memory:")
|
| 98 |
|
| 99 |
# ==================== DOCUMENT PROCESSING ====================
|
|
@@ -496,7 +495,7 @@ def search_protocol_for_instruments(domain: str) -> dict:
|
|
| 496 |
return {"domain": domain, "instrument": "Error during identification", "context": str(e)}
|
| 497 |
|
| 498 |
@tool
|
| 499 |
-
def analyze_protocol_domains() ->
|
| 500 |
"""Analyze all NIH HEAL CDE core domains and identify instruments used in the protocol."""
|
| 501 |
# Check if protocol document exists
|
| 502 |
uploaded_files = [f for f in os.listdir(UPLOAD_PATH) if f.endswith('.pdf')]
|
|
@@ -504,23 +503,21 @@ def analyze_protocol_domains() -> str:
|
|
| 504 |
return "No protocol document has been uploaded yet."
|
| 505 |
|
| 506 |
# For each domain, search for relevant instruments
|
| 507 |
-
|
| 508 |
|
| 509 |
for domain in NIH_HEAL_CORE_DOMAINS:
|
| 510 |
# Use the search_protocol_for_instruments tool to get results for each domain
|
| 511 |
result = search_protocol_for_instruments(domain)
|
| 512 |
-
domain_instruments[domain] = result["instrument"]
|
| 513 |
print(f"Identified instrument for {domain}: {result['instrument']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
-
#
|
| 516 |
-
|
| 517 |
-
result += "| Domain | Protocol Instrument |\n"
|
| 518 |
-
result += "|--------|--------------------|\n"
|
| 519 |
-
|
| 520 |
-
for domain, instrument in domain_instruments.items():
|
| 521 |
-
result += f"| {domain} | {instrument} |\n"
|
| 522 |
-
|
| 523 |
-
return result
|
| 524 |
|
| 525 |
@tool
|
| 526 |
def format_domain_analysis(analysis_results: list, title: str = "NIH HEAL CDE Core Domains Analysis") -> str:
|
|
@@ -531,14 +528,15 @@ def format_domain_analysis(analysis_results: list, title: str = "NIH HEAL CDE Co
|
|
| 531 |
|
| 532 |
Returns:
|
| 533 |
Markdown formatted table of domains and identified instruments
|
| 534 |
-
|
| 535 |
-
|
| 536 |
"""
|
| 537 |
-
|
|
|
|
|
|
|
|
|
|
| 538 |
# Format the results as a markdown table
|
| 539 |
result = f"# {title}\n\n"
|
| 540 |
-
result += "| Domain | Protocol Instrument |\n"
|
| 541 |
-
result += "|--------|-
|
| 542 |
|
| 543 |
for item in analysis_results:
|
| 544 |
domain = item.get("domain", "Unknown")
|
|
|
|
| 93 |
return len(sample_embedding)
|
| 94 |
|
| 95 |
# ==================== QDRANT SETUP ====================
|
|
|
|
| 96 |
qdrant_client = QdrantClient(":memory:")
|
| 97 |
|
| 98 |
# ==================== DOCUMENT PROCESSING ====================
|
|
|
|
| 495 |
return {"domain": domain, "instrument": "Error during identification", "context": str(e)}
|
| 496 |
|
| 497 |
@tool
|
| 498 |
+
def analyze_protocol_domains() -> list:
|
| 499 |
"""Analyze all NIH HEAL CDE core domains and identify instruments used in the protocol."""
|
| 500 |
# Check if protocol document exists
|
| 501 |
uploaded_files = [f for f in os.listdir(UPLOAD_PATH) if f.endswith('.pdf')]
|
|
|
|
| 503 |
return "No protocol document has been uploaded yet."
|
| 504 |
|
| 505 |
# For each domain, search for relevant instruments
|
| 506 |
+
domain_analysis_results = []
|
| 507 |
|
| 508 |
for domain in NIH_HEAL_CORE_DOMAINS:
|
| 509 |
# Use the search_protocol_for_instruments tool to get results for each domain
|
| 510 |
result = search_protocol_for_instruments(domain)
|
|
|
|
| 511 |
print(f"Identified instrument for {domain}: {result['instrument']}")
|
| 512 |
+
|
| 513 |
+
# Add the result to our list of analysis results
|
| 514 |
+
domain_analysis_results.append({
|
| 515 |
+
"domain": domain,
|
| 516 |
+
"instrument": result["instrument"]
|
| 517 |
+
})
|
| 518 |
|
| 519 |
+
# Return the raw analysis results instead of formatting them
|
| 520 |
+
return domain_analysis_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
|
| 522 |
@tool
|
| 523 |
def format_domain_analysis(analysis_results: list, title: str = "NIH HEAL CDE Core Domains Analysis") -> str:
|
|
|
|
| 528 |
|
| 529 |
Returns:
|
| 530 |
Markdown formatted table of domains and identified instruments
|
|
|
|
|
|
|
| 531 |
"""
|
| 532 |
+
# Get the name of the uploaded protocol file
|
| 533 |
+
uploaded_files = [f for f in os.listdir(UPLOAD_PATH) if f.endswith('.pdf')]
|
| 534 |
+
protocol_name = uploaded_files[0] if uploaded_files else "Unknown Protocol"
|
| 535 |
+
|
| 536 |
# Format the results as a markdown table
|
| 537 |
result = f"# {title}\n\n"
|
| 538 |
+
result += f"| Domain | Protocol Instrument - {protocol_name} |\n"
|
| 539 |
+
result += "|--------|" + "-" * (len(protocol_name) + 23) + "|\n"
|
| 540 |
|
| 541 |
for item in analysis_results:
|
| 542 |
domain = item.get("domain", "Unknown")
|