Update app.py
Browse files
app.py
CHANGED
|
@@ -25,14 +25,8 @@ for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
|
|
| 25 |
os.environ["HF_HOME"] = model_cache_dir
|
| 26 |
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
|
| 27 |
|
| 28 |
-
# Fixed this line - added missing closing parenthesis
|
| 29 |
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
|
| 30 |
-
|
| 31 |
-
try:
|
| 32 |
-
from txagent.txagent import TxAgent
|
| 33 |
-
except ImportError as e:
|
| 34 |
-
print(f"Error importing TxAgent: {e}")
|
| 35 |
-
sys.exit(1)
|
| 36 |
|
| 37 |
MAX_MODEL_TOKENS = 32768
|
| 38 |
MAX_CHUNK_TOKENS = 8192
|
|
@@ -48,37 +42,40 @@ def clean_response(text: str) -> str:
|
|
| 48 |
def estimate_tokens(text: str) -> int:
|
| 49 |
return len(text) // 3.5 + 1
|
| 50 |
|
| 51 |
-
def extract_text_from_excel(file_obj
|
| 52 |
-
"""
|
| 53 |
all_text = []
|
| 54 |
try:
|
| 55 |
-
# Handle
|
| 56 |
-
if hasattr(file_obj, 'name'):
|
| 57 |
file_path = file_obj.name
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
else: # File-like object
|
| 61 |
file_path = file_obj
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Verify file exists
|
| 64 |
if not os.path.exists(file_path):
|
| 65 |
-
raise
|
| 66 |
|
| 67 |
xls = pd.ExcelFile(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
except Exception as e:
|
| 69 |
-
raise ValueError(f"β Error
|
| 70 |
-
|
| 71 |
-
for sheet_name in xls.sheet_names:
|
| 72 |
-
try:
|
| 73 |
-
df = xls.parse(sheet_name).astype(str).fillna("")
|
| 74 |
-
rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
|
| 75 |
-
sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
|
| 76 |
-
all_text.extend(sheet_text)
|
| 77 |
-
except Exception as e:
|
| 78 |
-
print(f"Warning: Could not parse sheet {sheet_name}: {e}")
|
| 79 |
-
continue
|
| 80 |
-
|
| 81 |
-
return "\n".join(all_text)
|
| 82 |
def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
|
| 83 |
effective_max = max_tokens - PROMPT_OVERHEAD
|
| 84 |
lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
|
|
@@ -119,7 +116,6 @@ Respond in well-structured bullet points with medical reasoning.
|
|
| 119 |
def init_agent():
|
| 120 |
tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
| 121 |
if not os.path.exists(tool_path):
|
| 122 |
-
# Create default tool file if it doesn't exist
|
| 123 |
default_tool = {
|
| 124 |
"name": "new_tool",
|
| 125 |
"description": "Default tool configuration",
|
|
@@ -141,24 +137,19 @@ def init_agent():
|
|
| 141 |
agent.init_model()
|
| 142 |
return agent
|
| 143 |
|
| 144 |
-
def stream_report(agent, input_file
|
| 145 |
accumulated_text = ""
|
| 146 |
try:
|
| 147 |
if input_file is None:
|
| 148 |
yield "β Please upload a valid Excel file.", None, ""
|
| 149 |
return
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
elif isinstance(input_file, str):
|
| 157 |
-
if not os.path.exists(input_file):
|
| 158 |
-
yield "β The specified file path does not exist.", None, ""
|
| 159 |
-
return
|
| 160 |
|
| 161 |
-
text = extract_text_from_excel(input_file)
|
| 162 |
chunks = split_text_into_chunks(text)
|
| 163 |
|
| 164 |
for i, chunk in enumerate(chunks):
|
|
@@ -198,7 +189,8 @@ def stream_report(agent, input_file: Union[str, BinaryIO], full_output: str) ->
|
|
| 198 |
yield accumulated_text, report_path, cleaned
|
| 199 |
|
| 200 |
except Exception as e:
|
| 201 |
-
yield f"β
|
|
|
|
| 202 |
def create_ui(agent):
|
| 203 |
with gr.Blocks(css="""
|
| 204 |
body {
|
|
|
|
| 25 |
os.environ["HF_HOME"] = model_cache_dir
|
| 26 |
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
|
| 27 |
|
|
|
|
| 28 |
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
|
| 29 |
+
from txagent.txagent import TxAgent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
MAX_MODEL_TOKENS = 32768
|
| 32 |
MAX_CHUNK_TOKENS = 8192
|
|
|
|
| 42 |
def estimate_tokens(text: str) -> int:
|
| 43 |
return len(text) // 3.5 + 1
|
| 44 |
|
| 45 |
+
def extract_text_from_excel(file_obj) -> str:
|
| 46 |
+
"""Handle both Gradio file objects and direct file paths"""
|
| 47 |
all_text = []
|
| 48 |
try:
|
| 49 |
+
# Handle Gradio file object
|
| 50 |
+
if hasattr(file_obj, 'name'):
|
| 51 |
file_path = file_obj.name
|
| 52 |
+
# Handle direct file path
|
| 53 |
+
elif isinstance(file_obj, (str, os.PathLike)):
|
|
|
|
| 54 |
file_path = file_obj
|
| 55 |
+
else:
|
| 56 |
+
raise ValueError("Unsupported file input type")
|
| 57 |
|
| 58 |
# Verify file exists
|
| 59 |
if not os.path.exists(file_path):
|
| 60 |
+
raise FileNotFoundError(f"File not found at path: {file_path}")
|
| 61 |
|
| 62 |
xls = pd.ExcelFile(file_path)
|
| 63 |
+
|
| 64 |
+
for sheet_name in xls.sheet_names:
|
| 65 |
+
try:
|
| 66 |
+
df = xls.parse(sheet_name).astype(str).fillna("")
|
| 67 |
+
rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
|
| 68 |
+
sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
|
| 69 |
+
all_text.extend(sheet_text)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Warning: Could not parse sheet {sheet_name}: {e}")
|
| 72 |
+
continue
|
| 73 |
+
|
| 74 |
+
return "\n".join(all_text)
|
| 75 |
+
|
| 76 |
except Exception as e:
|
| 77 |
+
raise ValueError(f"β Error processing Excel file: {str(e)}")
|
| 78 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
|
| 80 |
effective_max = max_tokens - PROMPT_OVERHEAD
|
| 81 |
lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
|
|
|
|
| 116 |
def init_agent():
|
| 117 |
tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
| 118 |
if not os.path.exists(tool_path):
|
|
|
|
| 119 |
default_tool = {
|
| 120 |
"name": "new_tool",
|
| 121 |
"description": "Default tool configuration",
|
|
|
|
| 137 |
agent.init_model()
|
| 138 |
return agent
|
| 139 |
|
| 140 |
+
def stream_report(agent, input_file, full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
|
| 141 |
accumulated_text = ""
|
| 142 |
try:
|
| 143 |
if input_file is None:
|
| 144 |
yield "β Please upload a valid Excel file.", None, ""
|
| 145 |
return
|
| 146 |
|
| 147 |
+
try:
|
| 148 |
+
text = extract_text_from_excel(input_file)
|
| 149 |
+
except Exception as e:
|
| 150 |
+
yield f"β {str(e)}", None, ""
|
| 151 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
|
|
|
| 153 |
chunks = split_text_into_chunks(text)
|
| 154 |
|
| 155 |
for i, chunk in enumerate(chunks):
|
|
|
|
| 189 |
yield accumulated_text, report_path, cleaned
|
| 190 |
|
| 191 |
except Exception as e:
|
| 192 |
+
yield f"β Unexpected error: {str(e)}", None, ""
|
| 193 |
+
|
| 194 |
def create_ui(agent):
|
| 195 |
with gr.Blocks(css="""
|
| 196 |
body {
|