Update app.py
Browse files
app.py
CHANGED
|
@@ -3,22 +3,30 @@ import os
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
import re
|
| 6 |
-
from typing import Dict, Any
|
| 7 |
from pathlib import Path
|
| 8 |
-
from unstructured.partition.pdf import partition_pdf
|
| 9 |
from flask import Flask, request, jsonify
|
| 10 |
from flask_cors import CORS
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
-
from flask import send_from_directory, abort
|
| 13 |
-
from bloatectomy import bloatectomy
|
| 14 |
from werkzeug.utils import secure_filename
|
| 15 |
from langchain_groq import ChatGroq
|
| 16 |
-
from typing_extensions import TypedDict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# --- Logging ---
|
| 19 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 20 |
-
logger = logging.getLogger("
|
| 21 |
-
|
| 22 |
# --- Load environment ---
|
| 23 |
load_dotenv()
|
| 24 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
|
@@ -28,98 +36,51 @@ if not GROQ_API_KEY:
|
|
| 28 |
|
| 29 |
# --- Flask app setup ---
|
| 30 |
BASE_DIR = Path(__file__).resolve().parent
|
| 31 |
-
REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", str(BASE_DIR / "reports")))
|
| 32 |
static_folder = BASE_DIR / "static"
|
| 33 |
|
| 34 |
app = Flask(__name__, static_folder=str(static_folder), static_url_path="/static")
|
| 35 |
CORS(app)
|
| 36 |
|
| 37 |
-
# Ensure the reports directory exists
|
| 38 |
-
os.makedirs(REPORTS_ROOT, exist_ok=True)
|
| 39 |
-
|
| 40 |
# --- LLM setup ---
|
|
|
|
| 41 |
llm = ChatGroq(
|
| 42 |
-
model=os.getenv("LLM_MODEL", "
|
| 43 |
-
temperature=0.
|
| 44 |
-
max_tokens=
|
| 45 |
api_key=GROQ_API_KEY,
|
| 46 |
)
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
b = bloatectomy(text, style=style, output="html")
|
| 51 |
-
tokens = getattr(b, "tokens", None)
|
| 52 |
-
if not tokens:
|
| 53 |
-
return text
|
| 54 |
-
return "\n".join(tokens)
|
| 55 |
-
except Exception:
|
| 56 |
-
logger.exception("Bloatectomy cleaning failed; returning original text")
|
| 57 |
-
return text
|
| 58 |
-
|
| 59 |
-
PATIENT_ASSISTANT_PROMPT = """
|
| 60 |
-
You are a helpful medical assistant acting as a doctor. You respond naturally to greetings and general medical questions without asking for patient ID unless the user requests information about prior medical records
|
| 61 |
|
| 62 |
Behavior rules (follow these strictly):
|
| 63 |
-
-
|
| 64 |
-
-
|
| 65 |
-
-
|
| 66 |
-
-
|
| 67 |
-
-
|
| 68 |
-
- When analyzing medical reports, trust the patient ID from the folder or query context as the source of truth.
|
| 69 |
-
- **If the report text mentions a different patient ID or name, do not refuse to answer but mention the discrepancy politely and proceed to answer based on the available data.**
|
| 70 |
-
- **Always protect patient privacy and avoid sharing information from reports not matching the current PID unless explicitly requested and with a clear disclaimer.**
|
| 71 |
|
| 72 |
STRICT OUTPUT FORMAT (JSON ONLY):
|
| 73 |
Return a single JSON object with the following keys:
|
| 74 |
- assistant_reply: string // a natural language reply to the user (short, helpful, always present)
|
| 75 |
-
-
|
| 76 |
-
-
|
| 77 |
|
| 78 |
Rules:
|
| 79 |
- ALWAYS include `assistant_reply` as a non-empty string.
|
| 80 |
- Do NOT produce any text outside the JSON object.
|
| 81 |
-
- Be concise in `assistant_reply`
|
| 82 |
-
- Do not make up information
|
| 83 |
"""
|
| 84 |
|
| 85 |
-
PID_PATTERN = re.compile(r"(?:\bpid\b|\bpatient\s*id\b|\bp\b)\s*[:#\-]?\s*(p?\d+)", re.IGNORECASE)
|
| 86 |
-
DIGIT_PATTERN = re.compile(r"\b(p?\d{3,})\b")
|
| 87 |
-
|
| 88 |
-
RECORD_KEYWORDS = [
|
| 89 |
-
"report", "lab", "result", "results", "previous", "history", "record", "records",
|
| 90 |
-
"test", "tests", "scan", "imaging", "radiology", "thyroid", "tsh", "t3", "t4",
|
| 91 |
-
"prescription", "doctor", "referral", "visit", "consultation",
|
| 92 |
-
]
|
| 93 |
-
|
| 94 |
-
def extract_pid_from_text(text: str) -> str | None:
|
| 95 |
-
if not text:
|
| 96 |
-
return None
|
| 97 |
-
m = PID_PATTERN.search(text)
|
| 98 |
-
if m:
|
| 99 |
-
return m.group(1).lstrip('pP')
|
| 100 |
-
if any(k in text.lower() for k in RECORD_KEYWORDS):
|
| 101 |
-
m2 = DIGIT_PATTERN.search(text)
|
| 102 |
-
if m2:
|
| 103 |
-
return m2.group(1).lstrip('pP')
|
| 104 |
-
return None
|
| 105 |
-
|
| 106 |
-
def needs_pid_for_query(text: str) -> bool:
|
| 107 |
-
if not text:
|
| 108 |
-
return False
|
| 109 |
-
lower = text.lower()
|
| 110 |
-
phrases = ["previous report", "previous lab", "my report", "my records", "past report", "last report", "previous test", "previous results"]
|
| 111 |
-
if any(p in lower for p in phrases):
|
| 112 |
-
return True
|
| 113 |
-
if any(k in lower for k in RECORD_KEYWORDS):
|
| 114 |
-
return True
|
| 115 |
-
return False
|
| 116 |
-
|
| 117 |
def extract_json_from_llm_response(raw_response: str) -> dict:
|
|
|
|
| 118 |
default = {
|
| 119 |
"assistant_reply": "I'm sorry — I couldn't understand that. Could you please rephrase?",
|
| 120 |
-
"
|
| 121 |
-
"
|
| 122 |
}
|
|
|
|
| 123 |
if not raw_response or not isinstance(raw_response, str):
|
| 124 |
return default
|
| 125 |
m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
|
|
@@ -139,62 +100,26 @@ def extract_json_from_llm_response(raw_response: str) -> dict:
|
|
| 139 |
except Exception as e:
|
| 140 |
logger.warning("Failed to parse JSON from LLM output: %s", e)
|
| 141 |
return default
|
|
|
|
|
|
|
| 142 |
if isinstance(parsed, dict) and "assistant_reply" in parsed and isinstance(parsed["assistant_reply"], str) and parsed["assistant_reply"].strip():
|
| 143 |
-
parsed.setdefault("
|
| 144 |
-
parsed.setdefault("
|
| 145 |
return parsed
|
| 146 |
else:
|
| 147 |
logger.warning("Parsed JSON missing 'assistant_reply' or invalid format. Returning default.")
|
| 148 |
return default
|
| 149 |
|
| 150 |
-
def extract_details_from_user_message(user_message: str) -> dict:
|
| 151 |
-
"""
|
| 152 |
-
Use the LLM to extract patient details (name, contact, city, problem) from the user's last message.
|
| 153 |
-
Returns a dict with any found fields.
|
| 154 |
-
"""
|
| 155 |
-
extraction_prompt = f"""
|
| 156 |
-
Extract any patient details from the following user message. Return a JSON object with keys name, contact, city, problem.
|
| 157 |
-
If a field is not present, omit it.
|
| 158 |
-
|
| 159 |
-
User message:
|
| 160 |
-
\"\"\"{user_message}\"\"\"
|
| 161 |
-
"""
|
| 162 |
-
messages = [
|
| 163 |
-
{"role": "system", "content": "You are a helpful assistant that extracts patient details from user messages."},
|
| 164 |
-
{"role": "user", "content": extraction_prompt}
|
| 165 |
-
]
|
| 166 |
-
try:
|
| 167 |
-
response = llm.invoke(messages)
|
| 168 |
-
content = response.content if hasattr(response, "content") else str(response)
|
| 169 |
-
extracted = extract_json_from_llm_response(content)
|
| 170 |
-
return extracted.get("patientDetails", extracted) # support both keys
|
| 171 |
-
except Exception as e:
|
| 172 |
-
logger.warning(f"Detail extraction failed: {e}")
|
| 173 |
-
return {}
|
| 174 |
-
|
| 175 |
# --- Flask routes ---
|
| 176 |
@app.route("/", methods=["GET"])
|
| 177 |
def serve_frontend():
|
| 178 |
try:
|
| 179 |
-
|
|
|
|
| 180 |
except Exception:
|
| 181 |
return "<h3>frontend.html not found in static/ — please add your frontend.html there.</h3>", 404
|
| 182 |
|
| 183 |
-
|
| 184 |
-
def upload_report():
|
| 185 |
-
if 'report' not in request.files:
|
| 186 |
-
return jsonify({"error": "No file part in the request"}), 400
|
| 187 |
-
file = request.files['report']
|
| 188 |
-
patient_id = request.form.get("patient_id")
|
| 189 |
-
if file.filename == '' or not patient_id:
|
| 190 |
-
return jsonify({"error": "No selected file or patient ID"}), 400
|
| 191 |
-
if file:
|
| 192 |
-
filename = secure_filename(file.filename)
|
| 193 |
-
patient_folder = REPORTS_ROOT / f"{patient_id}"
|
| 194 |
-
os.makedirs(patient_folder, exist_ok=True)
|
| 195 |
-
file_path = patient_folder / filename
|
| 196 |
-
file.save(file_path)
|
| 197 |
-
return jsonify({"message": f"File '{filename}' uploaded successfully for patient ID '{patient_id}'."}), 200
|
| 198 |
|
| 199 |
@app.route("/chat", methods=["POST"])
|
| 200 |
def chat():
|
|
@@ -202,291 +127,177 @@ def chat():
|
|
| 202 |
if not isinstance(data, dict):
|
| 203 |
return jsonify({"error": "invalid request body"}), 400
|
| 204 |
|
| 205 |
-
chat_history = data.get("chat_history") or []
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
state =
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
}
|
| 240 |
-
return jsonify(response_payload)
|
| 241 |
-
else:
|
| 242 |
-
assistant_reply = (
|
| 243 |
-
"I still need your Patient ID (PID) to access your records. "
|
| 244 |
-
"If you prefer, I can help with general medical questions instead."
|
| 245 |
-
)
|
| 246 |
-
response_payload = {
|
| 247 |
-
"assistant_reply": assistant_reply,
|
| 248 |
-
"updated_state": state,
|
| 249 |
-
}
|
| 250 |
-
return jsonify(response_payload)
|
| 251 |
-
|
| 252 |
-
# If we have a PID, check whether any allowed files exist for that PID.
|
| 253 |
-
has_allowed_files = False
|
| 254 |
-
if patient_id and str(patient_id).strip() != "":
|
| 255 |
-
patient_folder = REPORTS_ROOT / f"{patient_id}"
|
| 256 |
-
if patient_folder.exists() and patient_folder.is_dir():
|
| 257 |
-
for f in patient_folder.iterdir():
|
| 258 |
-
if f.is_file():
|
| 259 |
-
ext = f.suffix.lower().lstrip(".")
|
| 260 |
-
if ext in ALLOWED_EXTENSIONS:
|
| 261 |
-
has_allowed_files = True
|
| 262 |
-
break
|
| 263 |
-
|
| 264 |
-
# IMPORTANT: do NOT short-circuit here.
|
| 265 |
-
# If the user explicitly asked for previous records (wants_records == True)
|
| 266 |
-
# and we have no files, we will tell the LLM that there are no uploaded records
|
| 267 |
-
# via the SYSTEM_HINT (so LLM can respond appropriately). We DO NOT return early,
|
| 268 |
-
# and we DO NOT add any extra JSON fields to the response.
|
| 269 |
-
if has_allowed_files:
|
| 270 |
-
# read files and build combined_text_parts (existing behavior)
|
| 271 |
-
for fname in sorted(os.listdir(patient_folder)):
|
| 272 |
-
file_path = patient_folder / fname
|
| 273 |
-
page_text = ""
|
| 274 |
-
if partition_pdf is not None and str(file_path).lower().endswith('.pdf'):
|
| 275 |
-
try:
|
| 276 |
-
elements = partition_pdf(filename=str(file_path))
|
| 277 |
-
page_text = "\n".join([el.text for el in elements if hasattr(el, 'text') and el.text])
|
| 278 |
-
except Exception:
|
| 279 |
-
logger.exception("Failed to parse PDF %s", file_path)
|
| 280 |
-
else:
|
| 281 |
-
try:
|
| 282 |
-
page_text = file_path.read_text(encoding='utf-8', errors='ignore')
|
| 283 |
-
except Exception:
|
| 284 |
-
page_text = ""
|
| 285 |
-
|
| 286 |
-
if page_text:
|
| 287 |
-
cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
|
| 288 |
-
if cleaned:
|
| 289 |
-
combined_text_parts.append(cleaned)
|
| 290 |
-
else:
|
| 291 |
-
# no files: do not modify state or return. We'll include a hint for the LLM below
|
| 292 |
-
logger.info("No uploaded files found for PID %s. Will inform LLM only if user asked for records.", patient_id)
|
| 293 |
-
|
| 294 |
-
# Build conversationSummary from any docs we read (unchanged)
|
| 295 |
-
base_summary = state.get("conversationSummary", "") or ""
|
| 296 |
-
docs_summary = "\n\n".join(combined_text_parts)
|
| 297 |
-
if docs_summary:
|
| 298 |
-
state["conversationSummary"] = (base_summary + "\n\n" + docs_summary).strip()
|
| 299 |
-
else:
|
| 300 |
-
state["conversationSummary"] = base_summary
|
| 301 |
-
|
| 302 |
-
# Prepare the action hint. If user asked for records but there are no uploaded files,
|
| 303 |
-
# explicitly tell the LLM so it can respond like "No records available for PID X".
|
| 304 |
-
if patient_id and str(patient_id).strip() != "":
|
| 305 |
-
if wants_records and not has_allowed_files:
|
| 306 |
-
action_hint = (
|
| 307 |
-
f"User asked about prior records. NOTE: there are NO uploaded medical records for patient ID {patient_id}."
|
| 308 |
-
)
|
| 309 |
-
else:
|
| 310 |
-
action_hint = f"Use the patient ID {patient_id} to retrieve and summarize any relevant reports."
|
| 311 |
else:
|
| 312 |
-
action_hint = "
|
| 313 |
|
| 314 |
user_prompt = f"""
|
| 315 |
-
Current
|
| 316 |
-
|
| 317 |
-
Last user message: {state.get("lastUserMessage", "")}
|
| 318 |
|
| 319 |
SYSTEM_HINT: {action_hint}
|
| 320 |
|
| 321 |
-
Return ONLY valid JSON with keys: assistant_reply,
|
| 322 |
"""
|
| 323 |
|
| 324 |
messages = [
|
| 325 |
-
{"role": "system", "content":
|
| 326 |
{"role": "user", "content": user_prompt}
|
| 327 |
]
|
| 328 |
|
| 329 |
try:
|
| 330 |
-
logger.info("Invoking LLM
|
| 331 |
llm_response = llm.invoke(messages)
|
| 332 |
-
raw_response = ""
|
| 333 |
-
if hasattr(llm_response, "content"):
|
| 334 |
-
raw_response = llm_response.content
|
| 335 |
-
else:
|
| 336 |
-
raw_response = str(llm_response)
|
| 337 |
|
| 338 |
-
logger.info(f"Raw LLM response: {raw_response}")
|
| 339 |
parsed_result = extract_json_from_llm_response(raw_response)
|
| 340 |
|
| 341 |
except Exception as e:
|
| 342 |
logger.exception("LLM invocation failed")
|
| 343 |
return jsonify({"error": "LLM invocation failed", "detail": str(e)}), 500
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
state
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
last_msg_lower = state.get("lastUserMessage", "").lower()
|
| 356 |
-
conversation_summary_lower = state.get("conversationSummary", "").lower()
|
| 357 |
-
|
| 358 |
-
wants_to_book = any(kw in last_msg_lower for kw in booking_intent_keywords) or \
|
| 359 |
-
any(kw in conversation_summary_lower for kw in booking_intent_keywords)
|
| 360 |
-
|
| 361 |
-
if wants_to_book:
|
| 362 |
-
# Extract details from last user message
|
| 363 |
-
extracted_details = extract_details_from_user_message(state.get("lastUserMessage", ""))
|
| 364 |
-
patient_details = state.setdefault("patientDetails", {})
|
| 365 |
-
# Update patientDetails with any newly extracted info
|
| 366 |
-
for key in REQUIRED_DETAILS:
|
| 367 |
-
if key in extracted_details and extracted_details[key]:
|
| 368 |
-
patient_details[key] = extracted_details[key]
|
| 369 |
-
|
| 370 |
-
missing_fields = [field for field in REQUIRED_DETAILS if not patient_details.get(field)]
|
| 371 |
-
if missing_fields:
|
| 372 |
-
missing_field = missing_fields[0]
|
| 373 |
-
field_prompts = {
|
| 374 |
-
"name": "Could you please provide your full name?",
|
| 375 |
-
"contact": "May I have your contact number?",
|
| 376 |
-
"city": "What city are you located in?",
|
| 377 |
-
"problem": "Please briefly describe your medical problem or reason for the appointment.",
|
| 378 |
-
}
|
| 379 |
-
assistant_reply = field_prompts.get(missing_field, f"Please provide your {missing_field}.")
|
| 380 |
-
response_payload = {
|
| 381 |
-
"assistant_reply": assistant_reply,
|
| 382 |
-
"updated_state": state,
|
| 383 |
-
}
|
| 384 |
-
return jsonify(response_payload)
|
| 385 |
-
|
| 386 |
-
assistant_reply = updated_state.get("assistant_reply")
|
| 387 |
if not assistant_reply or not isinstance(assistant_reply, str) or not assistant_reply.strip():
|
| 388 |
-
assistant_reply = "I'm here to help
|
| 389 |
|
|
|
|
| 390 |
response_payload = {
|
| 391 |
"assistant_reply": assistant_reply,
|
| 392 |
"updated_state": state,
|
|
|
|
| 393 |
}
|
| 394 |
|
| 395 |
return jsonify(response_payload)
|
| 396 |
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
uploaded_files = request.files.getlist("files")
|
| 405 |
-
if not uploaded_files:
|
| 406 |
-
single = request.files.get("file")
|
| 407 |
-
if single:
|
| 408 |
-
uploaded_files = [single]
|
| 409 |
-
|
| 410 |
-
if not uploaded_files:
|
| 411 |
-
return jsonify({"error": "no files uploaded (use form field 'files')"}), 400
|
| 412 |
-
|
| 413 |
-
patient_folder = REPORTS_ROOT / str(patient_id)
|
| 414 |
-
patient_folder.mkdir(parents=True, exist_ok=True)
|
| 415 |
-
|
| 416 |
-
saved = []
|
| 417 |
-
skipped = []
|
| 418 |
-
|
| 419 |
-
for file_storage in uploaded_files:
|
| 420 |
-
orig_name = getattr(file_storage, "filename", "") or ""
|
| 421 |
-
filename = secure_filename(orig_name)
|
| 422 |
-
if not filename:
|
| 423 |
-
skipped.append({"filename": orig_name, "reason": "invalid filename"})
|
| 424 |
-
continue
|
| 425 |
-
|
| 426 |
-
ext = filename.rsplit(".", 1)[1].lower() if "." in filename else ""
|
| 427 |
-
if ext not in ALLOWED_EXTENSIONS:
|
| 428 |
-
skipped.append({"filename": filename, "reason": f"extension '{ext}' not allowed"})
|
| 429 |
-
continue
|
| 430 |
-
|
| 431 |
-
dest = patient_folder / filename
|
| 432 |
-
if dest.exists():
|
| 433 |
-
base, dot, extension = filename.rpartition(".")
|
| 434 |
-
base = base or filename
|
| 435 |
-
i = 1
|
| 436 |
-
while True:
|
| 437 |
-
candidate = f"{base}__{i}.{extension}" if extension else f"{base}__{i}"
|
| 438 |
-
dest = patient_folder / candidate
|
| 439 |
-
if not dest.exists():
|
| 440 |
-
filename = candidate
|
| 441 |
-
break
|
| 442 |
-
i += 1
|
| 443 |
-
|
| 444 |
-
try:
|
| 445 |
-
file_storage.save(str(dest))
|
| 446 |
-
saved.append(filename)
|
| 447 |
-
except Exception as e:
|
| 448 |
-
logger.exception("Failed to save uploaded file %s: %s", filename, e)
|
| 449 |
-
skipped.append({"filename": filename, "reason": f"save failed: {e}"})
|
| 450 |
-
|
| 451 |
-
return jsonify({
|
| 452 |
-
"patient_id": str(patient_id),
|
| 453 |
-
"saved": saved,
|
| 454 |
-
"skipped": skipped,
|
| 455 |
-
"patient_folder": str(patient_folder)
|
| 456 |
-
}), 200
|
| 457 |
-
|
| 458 |
-
except Exception as exc:
|
| 459 |
-
logger.exception("Upload failed: %s", exc)
|
| 460 |
-
return jsonify({"error": "upload failed", "detail": str(exc)}), 500
|
| 461 |
-
|
| 462 |
-
@app.route("/<patient_id>/<filename>")
|
| 463 |
-
def serve_report(patient_id, filename):
|
| 464 |
-
"""
|
| 465 |
-
Serve a specific uploaded PDF (or other allowed file) for a patient.
|
| 466 |
-
URL format: /<patient_id>/<filename>
|
| 467 |
-
Example: /p14562/report1.pdf
|
| 468 |
-
"""
|
| 469 |
-
try:
|
| 470 |
-
patient_folder = REPORTS_ROOT / str(patient_id)
|
| 471 |
-
|
| 472 |
-
if not patient_folder.exists():
|
| 473 |
-
abort(404, description=f"Patient folder not found: {patient_id}")
|
| 474 |
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
)
|
| 485 |
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
|
|
|
| 489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
@app.route("/ping", methods=["GET"])
|
| 492 |
def ping():
|
|
@@ -494,4 +305,4 @@ def ping():
|
|
| 494 |
|
| 495 |
if __name__ == "__main__":
|
| 496 |
port = int(os.getenv("PORT", 7860))
|
| 497 |
-
app.run(host="0.0.0.0", port=port, debug=True)
|
|
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
import re
|
| 6 |
+
from typing import Dict, Any, List
|
| 7 |
from pathlib import Path
|
|
|
|
| 8 |
from flask import Flask, request, jsonify
|
| 9 |
from flask_cors import CORS
|
| 10 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
| 11 |
from werkzeug.utils import secure_filename
|
| 12 |
from langchain_groq import ChatGroq
|
| 13 |
+
from typing_extensions import TypedDict
|
| 14 |
+
|
| 15 |
+
# --- Type Definitions for State Management ---
|
| 16 |
+
class TaggedReply(TypedDict):
|
| 17 |
+
reply: str
|
| 18 |
+
tags: List[str]
|
| 19 |
+
|
| 20 |
+
class AssistantState(TypedDict):
|
| 21 |
+
conversationSummary: str
|
| 22 |
+
lastUserMessage: str
|
| 23 |
+
language: str # New field to track the programming language
|
| 24 |
+
taggedReplies: List[TaggedReply] # New field for saving/bookmarking replies
|
| 25 |
|
| 26 |
# --- Logging ---
|
| 27 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 28 |
+
logger = logging.getLogger("code-assistant")
|
| 29 |
+
|
| 30 |
# --- Load environment ---
|
| 31 |
load_dotenv()
|
| 32 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
|
|
|
| 36 |
|
| 37 |
# --- Flask app setup ---
|
| 38 |
BASE_DIR = Path(__file__).resolve().parent
|
|
|
|
| 39 |
static_folder = BASE_DIR / "static"
|
| 40 |
|
| 41 |
app = Flask(__name__, static_folder=str(static_folder), static_url_path="/static")
|
| 42 |
CORS(app)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
# --- LLM setup ---
|
| 45 |
+
# Using a model that's good for coding tasks
|
| 46 |
llm = ChatGroq(
|
| 47 |
+
model=os.getenv("LLM_MODEL", "mixtral-8x7b-32768"), # Changed to a coding-friendly model
|
| 48 |
+
temperature=0.1, # Slightly less creative than general chat
|
| 49 |
+
max_tokens=2048, # Increased token limit for code
|
| 50 |
api_key=GROQ_API_KEY,
|
| 51 |
)
|
| 52 |
|
| 53 |
+
PROGRAMMING_ASSISTANT_PROMPT = """
|
| 54 |
+
You are an expert programming assistant. Your role is to provide code suggestions, fix bugs, explain programming concepts, and offer contextual help based on the user's query and preferred programming language.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
Behavior rules (follow these strictly):
|
| 57 |
+
- Contextual Help: Always aim to provide the most helpful, clear, and accurate information.
|
| 58 |
+
- Code Suggestions: When suggesting code, always enclose it in appropriate markdown code blocks (e.g., ```python\n...\n```).
|
| 59 |
+
- Error Explanation: When an error is provided, explain the root cause and provide a corrected code snippet if possible.
|
| 60 |
+
- Conceptual Questions: For questions like "What is a loop?", provide a clear, concise explanation with a simple, illustrative code example in the user's current language (if known, otherwise Python/JavaScript).
|
| 61 |
+
- Language Adaptation: Adjust your suggestions, code, and explanations to the programming language specified in the 'language' field of the 'AssistantState'. If 'language' is not set, ask the user what language they are working in.
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
STRICT OUTPUT FORMAT (JSON ONLY):
|
| 64 |
Return a single JSON object with the following keys:
|
| 65 |
- assistant_reply: string // a natural language reply to the user (short, helpful, always present)
|
| 66 |
+
- state_updates: object // updates to the internal state, keys may include: language, conversationSummary
|
| 67 |
+
- suggested_tags: array of strings // a list of 1-3 relevant tags for the assistant_reply (e.g., "Python", "Debugging", "Loop Concept")
|
| 68 |
|
| 69 |
Rules:
|
| 70 |
- ALWAYS include `assistant_reply` as a non-empty string.
|
| 71 |
- Do NOT produce any text outside the JSON object.
|
| 72 |
+
- Be concise in `assistant_reply`, but ensure the information is complete.
|
| 73 |
+
- Do not make up information.
|
| 74 |
"""
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 77 |
+
# Helper function remains largely the same, adapted for new keys
|
| 78 |
default = {
|
| 79 |
"assistant_reply": "I'm sorry — I couldn't understand that. Could you please rephrase?",
|
| 80 |
+
"state_updates": {},
|
| 81 |
+
"suggested_tags": [],
|
| 82 |
}
|
| 83 |
+
# ... [JSON parsing logic remains similar] ...
|
| 84 |
if not raw_response or not isinstance(raw_response, str):
|
| 85 |
return default
|
| 86 |
m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
|
|
|
|
| 100 |
except Exception as e:
|
| 101 |
logger.warning("Failed to parse JSON from LLM output: %s", e)
|
| 102 |
return default
|
| 103 |
+
|
| 104 |
+
# Validation for new keys
|
| 105 |
if isinstance(parsed, dict) and "assistant_reply" in parsed and isinstance(parsed["assistant_reply"], str) and parsed["assistant_reply"].strip():
|
| 106 |
+
parsed.setdefault("state_updates", {})
|
| 107 |
+
parsed.setdefault("suggested_tags", [])
|
| 108 |
return parsed
|
| 109 |
else:
|
| 110 |
logger.warning("Parsed JSON missing 'assistant_reply' or invalid format. Returning default.")
|
| 111 |
return default
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
# --- Flask routes ---
|
| 114 |
@app.route("/", methods=["GET"])
|
| 115 |
def serve_frontend():
|
| 116 |
try:
|
| 117 |
+
# Assuming you will update frontend.html for the new assistant
|
| 118 |
+
return app.send_static_file("frontend.html")
|
| 119 |
except Exception:
|
| 120 |
return "<h3>frontend.html not found in static/ — please add your frontend.html there.</h3>", 404
|
| 121 |
|
| 122 |
+
# UPLOAD routes are removed as they are no longer needed.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
@app.route("/chat", methods=["POST"])
|
| 125 |
def chat():
|
|
|
|
| 127 |
if not isinstance(data, dict):
|
| 128 |
return jsonify({"error": "invalid request body"}), 400
|
| 129 |
|
| 130 |
+
chat_history: List[Dict[str, str]] = data.get("chat_history") or []
|
| 131 |
+
# Using 'assistant_state' to clearly separate from old patient_state
|
| 132 |
+
assistant_state: AssistantState = data.get("assistant_state") or {}
|
| 133 |
+
|
| 134 |
+
# Initialize/Clean up state
|
| 135 |
+
state: AssistantState = {
|
| 136 |
+
"conversationSummary": assistant_state.get("conversationSummary", ""),
|
| 137 |
+
"lastUserMessage": "",
|
| 138 |
+
"language": assistant_state.get("language", "Python"), # Default to Python
|
| 139 |
+
"taggedReplies": assistant_state.get("taggedReplies", []),
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
# Find the last user message
|
| 143 |
+
for msg in reversed(chat_history):
|
| 144 |
+
if msg.get("role") == "user" and msg.get("content"):
|
| 145 |
+
state["lastUserMessage"] = msg["content"]
|
| 146 |
+
break
|
| 147 |
+
|
| 148 |
+
# --- Language Detection (Simple check for common programming languages) ---
|
| 149 |
+
last_msg_lower = state["lastUserMessage"].lower()
|
| 150 |
+
known_languages = ["python", "javascript", "java", "c++", "c#", "go", "ruby", "php", "typescript", "swift"]
|
| 151 |
+
|
| 152 |
+
# A simple regex to detect a language mention in the last message
|
| 153 |
+
lang_match = re.search(r'\b(in|using|for)\s+(' + '|'.join(known_languages) + r')\b', last_msg_lower)
|
| 154 |
+
if lang_match:
|
| 155 |
+
detected_lang = lang_match.group(2).capitalize()
|
| 156 |
+
if detected_lang != state["language"]:
|
| 157 |
+
logger.info("Detected new language: %s", detected_lang)
|
| 158 |
+
state["language"] = detected_lang
|
| 159 |
+
|
| 160 |
+
# --- LLM Prompt Construction ---
|
| 161 |
+
action_hint = ""
|
| 162 |
+
if state["language"]:
|
| 163 |
+
action_hint = f"Focus your answer on the {state['language']} programming language. If the user asks a conceptual question, use {state['language']} for examples."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
else:
|
| 165 |
+
action_hint = "The current language is unknown. Please ask the user to specify the programming language they are working in."
|
| 166 |
|
| 167 |
user_prompt = f"""
|
| 168 |
+
Current State: {json.dumps({"language": state["language"], "summary": state["conversationSummary"]})}
|
| 169 |
+
Last user message: {state["lastUserMessage"]}
|
|
|
|
| 170 |
|
| 171 |
SYSTEM_HINT: {action_hint}
|
| 172 |
|
| 173 |
+
Return ONLY valid JSON with keys: assistant_reply, state_updates, suggested_tags.
|
| 174 |
"""
|
| 175 |
|
| 176 |
messages = [
|
| 177 |
+
{"role": "system", "content": PROGRAMMING_ASSISTANT_PROMPT},
|
| 178 |
{"role": "user", "content": user_prompt}
|
| 179 |
]
|
| 180 |
|
| 181 |
try:
|
| 182 |
+
logger.info("Invoking LLM for code assistant...")
|
| 183 |
llm_response = llm.invoke(messages)
|
| 184 |
+
raw_response = llm_response.content if hasattr(llm_response, "content") else str(llm_response)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
+
logger.info(f"Raw LLM response: {raw_response[:200]}...")
|
| 187 |
parsed_result = extract_json_from_llm_response(raw_response)
|
| 188 |
|
| 189 |
except Exception as e:
|
| 190 |
logger.exception("LLM invocation failed")
|
| 191 |
return jsonify({"error": "LLM invocation failed", "detail": str(e)}), 500
|
| 192 |
|
| 193 |
+
# --- State Update from LLM ---
|
| 194 |
+
updated_state_from_llm = parsed_result.get("state_updates", {})
|
| 195 |
+
|
| 196 |
+
# Update state fields that the LLM is allowed to modify
|
| 197 |
+
if 'conversationSummary' in updated_state_from_llm:
|
| 198 |
+
state["conversationSummary"] = updated_state_from_llm["conversationSummary"]
|
| 199 |
+
if 'language' in updated_state_from_llm:
|
| 200 |
+
state["language"] = updated_state_from_llm["language"]
|
| 201 |
+
|
| 202 |
+
assistant_reply = parsed_result.get("assistant_reply")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
if not assistant_reply or not isinstance(assistant_reply, str) or not assistant_reply.strip():
|
| 204 |
+
assistant_reply = "I'm here to help with your code! What programming language are you using?"
|
| 205 |
|
| 206 |
+
# --- Final Response Payload ---
|
| 207 |
response_payload = {
|
| 208 |
"assistant_reply": assistant_reply,
|
| 209 |
"updated_state": state,
|
| 210 |
+
"suggested_tags": parsed_result.get("suggested_tags", []), # Pass tags to frontend
|
| 211 |
}
|
| 212 |
|
| 213 |
return jsonify(response_payload)
|
| 214 |
|
| 215 |
+
# --- New Route for Tagging/Bookmarking Replies ---
|
| 216 |
+
@app.route("/tag_reply", methods=["POST"])
|
| 217 |
+
def tag_reply():
|
| 218 |
+
data = request.get_json(force=True)
|
| 219 |
+
if not isinstance(data, dict):
|
| 220 |
+
return jsonify({"error": "invalid request body"}), 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
+
reply_content = data.get("reply")
|
| 223 |
+
tags = data.get("tags")
|
| 224 |
+
assistant_state: AssistantState = data.get("assistant_state") or {}
|
| 225 |
+
|
| 226 |
+
if not reply_content or not tags:
|
| 227 |
+
return jsonify({"error": "Missing 'reply' or 'tags' in request"}), 400
|
| 228 |
+
|
| 229 |
+
# Ensure tags is a list of strings
|
| 230 |
+
tags = [str(t).strip() for t in tags if str(t).strip()]
|
| 231 |
+
if not tags:
|
| 232 |
+
return jsonify({"error": "Tags list cannot be empty"}), 400
|
| 233 |
+
|
| 234 |
+
# Clean up state dictionary
|
| 235 |
+
state: AssistantState = {
|
| 236 |
+
"conversationSummary": assistant_state.get("conversationSummary", ""),
|
| 237 |
+
"lastUserMessage": "",
|
| 238 |
+
"language": assistant_state.get("language", "Python"),
|
| 239 |
+
"taggedReplies": assistant_state.get("taggedReplies", []),
|
| 240 |
+
}
|
| 241 |
|
| 242 |
+
new_tagged_reply: TaggedReply = {
|
| 243 |
+
"reply": reply_content,
|
| 244 |
+
"tags": tags,
|
| 245 |
+
}
|
|
|
|
| 246 |
|
| 247 |
+
# Add the new tagged reply
|
| 248 |
+
state["taggedReplies"].append(new_tagged_reply)
|
| 249 |
+
|
| 250 |
+
logger.info("Reply tagged with: %s", tags)
|
| 251 |
|
| 252 |
+
return jsonify({
|
| 253 |
+
"message": "Reply saved and tagged successfully.",
|
| 254 |
+
"updated_state": state,
|
| 255 |
+
}), 200
|
| 256 |
+
|
| 257 |
+
# --- Filtering/Search Route for Bookmarked Replies ---
|
| 258 |
+
@app.route("/search_tags", methods=["GET"])
|
| 259 |
+
def search_tags():
|
| 260 |
+
tag_query = request.args.get("tag")
|
| 261 |
+
# Using POST for /chat, so we'll pass state in the body
|
| 262 |
+
# For a simple GET search, we'd need the state to be sent here,
|
| 263 |
+
# but for simplicity, let's assume the state is passed in a POST body
|
| 264 |
+
# or fetched/maintained on the frontend and this route is just for logic.
|
| 265 |
+
|
| 266 |
+
# Assuming the frontend sends the current state via a POST request for search
|
| 267 |
+
if request.method == "GET":
|
| 268 |
+
return jsonify({"error": "Please use POST and include 'assistant_state' in the body for tag search."}), 405
|
| 269 |
+
|
| 270 |
+
# If using POST, you'd process request.get_json() here to get assistant_state
|
| 271 |
+
# For now, let's stick to the simpler GET and assume the frontend handles the state.
|
| 272 |
+
# To demonstrate the filtering logic:
|
| 273 |
+
|
| 274 |
+
# --- DUMMY STATE FOR DEMO ---
|
| 275 |
+
dummy_state: AssistantState = {
|
| 276 |
+
"conversationSummary": "",
|
| 277 |
+
"lastUserMessage": "",
|
| 278 |
+
"language": "Python",
|
| 279 |
+
"taggedReplies": [
|
| 280 |
+
{"reply": "A Python loop example.", "tags": ["Python", "Loop Concept"]},
|
| 281 |
+
{"reply": "Fix for 'undefined' error in JS.", "tags": ["JavaScript", "Debugging"]},
|
| 282 |
+
{"reply": "Explanation of Polymorphism.", "tags": ["Java", "OOP"]},
|
| 283 |
+
],
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
if not tag_query:
|
| 287 |
+
# Return all tagged replies if no query
|
| 288 |
+
return jsonify({"tag_query": "", "results": dummy_state["taggedReplies"]}), 200
|
| 289 |
+
|
| 290 |
+
tag_query_lower = tag_query.lower()
|
| 291 |
+
|
| 292 |
+
filtered_results = [
|
| 293 |
+
reply for reply in dummy_state["taggedReplies"]
|
| 294 |
+
if any(tag_query_lower in tag.lower() for tag in reply["tags"])
|
| 295 |
+
]
|
| 296 |
+
|
| 297 |
+
return jsonify({
|
| 298 |
+
"tag_query": tag_query,
|
| 299 |
+
"results": filtered_results
|
| 300 |
+
}), 200
|
| 301 |
|
| 302 |
@app.route("/ping", methods=["GET"])
|
| 303 |
def ping():
|
|
|
|
| 305 |
|
| 306 |
if __name__ == "__main__":
|
| 307 |
port = int(os.getenv("PORT", 7860))
|
| 308 |
+
app.run(host="0.0.0.0", port=port, debug=True)
|