Spaces:
Paused
Paused
Update app.py via AI Editor
Browse files
app.py
CHANGED
|
@@ -16,6 +16,12 @@ import datetime
|
|
| 16 |
from werkzeug.utils import secure_filename
|
| 17 |
import numpy as np
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
|
| 20 |
logger = logging.getLogger("AskTricare")
|
| 21 |
|
|
@@ -231,7 +237,6 @@ def left_navbar_static():
|
|
| 231 |
], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
|
| 232 |
|
| 233 |
def chat_box_card():
|
| 234 |
-
# Explicit scrollbars and height
|
| 235 |
return dbc.Card(
|
| 236 |
dbc.CardBody([
|
| 237 |
html.Div(id="chat-window", style={
|
|
@@ -326,18 +331,46 @@ app.clientside_callback(
|
|
| 326 |
|
| 327 |
def _is_supported_doc(filename):
|
| 328 |
ext = os.path.splitext(filename)[1].lower()
|
| 329 |
-
return ext in [".txt", ".pdf", ".md", ".docx"]
|
| 330 |
|
| 331 |
def _extract_text_from_upload(filepath, ext):
|
| 332 |
-
|
| 333 |
-
|
| 334 |
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
| 335 |
text = f.read()
|
| 336 |
return text
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
return ""
|
| 340 |
-
|
|
|
|
| 341 |
return ""
|
| 342 |
|
| 343 |
@app.callback(
|
|
|
|
| 16 |
from werkzeug.utils import secure_filename
|
| 17 |
import numpy as np
|
| 18 |
|
| 19 |
+
import io
|
| 20 |
+
|
| 21 |
+
from pdfminer.high_level import extract_text as pdf_extract_text
|
| 22 |
+
import docx
|
| 23 |
+
import openpyxl
|
| 24 |
+
|
| 25 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(threadName)s %(message)s")
|
| 26 |
logger = logging.getLogger("AskTricare")
|
| 27 |
|
|
|
|
| 237 |
], style={"padding": "1rem", "backgroundColor": "#f8f9fa", "height": "100vh", "overflowY": "auto"})
|
| 238 |
|
| 239 |
def chat_box_card():
|
|
|
|
| 240 |
return dbc.Card(
|
| 241 |
dbc.CardBody([
|
| 242 |
html.Div(id="chat-window", style={
|
|
|
|
| 331 |
|
| 332 |
def _is_supported_doc(filename):
|
| 333 |
ext = os.path.splitext(filename)[1].lower()
|
| 334 |
+
return ext in [".txt", ".pdf", ".md", ".docx", ".xlsx"]
|
| 335 |
|
| 336 |
def _extract_text_from_upload(filepath, ext):
|
| 337 |
+
try:
|
| 338 |
+
if ext in [".txt", ".md"]:
|
| 339 |
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
| 340 |
text = f.read()
|
| 341 |
return text
|
| 342 |
+
elif ext == ".pdf":
|
| 343 |
+
try:
|
| 344 |
+
text = pdf_extract_text(filepath)
|
| 345 |
+
return text
|
| 346 |
+
except Exception as e:
|
| 347 |
+
logger.error(f"Error reading PDF {filepath}: {e}")
|
| 348 |
+
return ""
|
| 349 |
+
elif ext == ".docx":
|
| 350 |
+
try:
|
| 351 |
+
doc = docx.Document(filepath)
|
| 352 |
+
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
| 353 |
+
return "\n".join(paragraphs)
|
| 354 |
+
except Exception as e:
|
| 355 |
+
logger.error(f"Error reading DOCX {filepath}: {e}")
|
| 356 |
+
return ""
|
| 357 |
+
elif ext == ".xlsx":
|
| 358 |
+
try:
|
| 359 |
+
wb = openpyxl.load_workbook(filepath, read_only=True, data_only=True)
|
| 360 |
+
text_rows = []
|
| 361 |
+
for ws in wb.worksheets:
|
| 362 |
+
for row in ws.iter_rows(values_only=True):
|
| 363 |
+
row_strs = [str(cell) for cell in row if cell is not None]
|
| 364 |
+
if any(row_strs):
|
| 365 |
+
text_rows.append("\t".join(row_strs))
|
| 366 |
+
return "\n".join(text_rows)
|
| 367 |
+
except Exception as e:
|
| 368 |
+
logger.error(f"Error reading XLSX {filepath}: {e}")
|
| 369 |
+
return ""
|
| 370 |
+
else:
|
| 371 |
return ""
|
| 372 |
+
except Exception as e:
|
| 373 |
+
logger.error(f"Error extracting text from {filepath}: {e}")
|
| 374 |
return ""
|
| 375 |
|
| 376 |
@app.callback(
|