Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app.py +43 -40
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -259,78 +259,77 @@ print(result["counts_df"])
|
|
| 259 |
'''
|
| 260 |
|
| 261 |
|
| 262 |
-
def generate_classify_code(input_type, description, categories, model, model_source, mode=None):
|
| 263 |
"""Generate Python code for classification."""
|
| 264 |
categories_str = ",\n ".join([f'"{cat}"' for cat in categories])
|
| 265 |
|
|
|
|
| 266 |
if input_type == "text":
|
| 267 |
-
|
| 268 |
-
import pandas as pd
|
| 269 |
|
| 270 |
# Load your data
|
| 271 |
df = pd.read_csv("your_data.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
# Define categories
|
| 274 |
categories = [
|
| 275 |
{categories_str}
|
| 276 |
]
|
| 277 |
|
| 278 |
-
# Classify
|
| 279 |
result = catllm.classify(
|
| 280 |
-
input_data=
|
| 281 |
categories=categories,
|
| 282 |
api_key="YOUR_API_KEY",
|
| 283 |
-
input_type="text",
|
| 284 |
description="{description}",
|
| 285 |
-
user_model="{model}"
|
| 286 |
-
model_source="{model_source}"
|
| 287 |
)
|
| 288 |
|
| 289 |
# View results
|
| 290 |
print(result)
|
| 291 |
result.to_csv("classified_results.csv", index=False)
|
| 292 |
'''
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
{categories_str}
|
| 300 |
-
]
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
input_data="path/to/your/pdfs/",
|
| 305 |
-
categories=categories,
|
| 306 |
-
api_key="YOUR_API_KEY",
|
| 307 |
-
input_type="pdf",
|
| 308 |
-
description="{description}"{mode_line},
|
| 309 |
-
user_model="{model}",
|
| 310 |
-
model_source="{model_source}"
|
| 311 |
-
)
|
| 312 |
|
| 313 |
-
# View results
|
| 314 |
-
print(result)
|
| 315 |
-
result.to_csv("classified_results.csv", index=False)
|
| 316 |
-
'''
|
| 317 |
-
else: # image
|
| 318 |
return f'''import catllm
|
| 319 |
-
|
| 320 |
# Define categories
|
| 321 |
categories = [
|
| 322 |
{categories_str}
|
| 323 |
]
|
| 324 |
|
| 325 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
result = catllm.classify(
|
| 327 |
-
input_data=
|
| 328 |
categories=categories,
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
description="{description}",
|
| 332 |
-
user_model="{model}",
|
| 333 |
-
model_source="{model_source}"
|
| 334 |
)
|
| 335 |
|
| 336 |
# View results
|
|
@@ -1344,7 +1343,11 @@ with col_input:
|
|
| 1344 |
)
|
| 1345 |
|
| 1346 |
# Generate code
|
| 1347 |
-
code = generate_classify_code(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1348 |
|
| 1349 |
st.session_state.results = {
|
| 1350 |
'df': result_df,
|
|
|
|
| 259 |
'''
|
| 260 |
|
| 261 |
|
| 262 |
+
def generate_classify_code(input_type, description, categories, model, model_source, mode=None, classify_mode="Single Model", models_list=None):
|
| 263 |
"""Generate Python code for classification."""
|
| 264 |
categories_str = ",\n ".join([f'"{cat}"' for cat in categories])
|
| 265 |
|
| 266 |
+
# Determine input data placeholder based on type
|
| 267 |
if input_type == "text":
|
| 268 |
+
input_placeholder = 'df["your_column"].tolist()'
|
| 269 |
+
load_data = '''import pandas as pd
|
| 270 |
|
| 271 |
# Load your data
|
| 272 |
df = pd.read_csv("your_data.csv")
|
| 273 |
+
'''
|
| 274 |
+
elif input_type == "pdf":
|
| 275 |
+
input_placeholder = '"path/to/your/pdfs/"'
|
| 276 |
+
load_data = ''
|
| 277 |
+
else: # image
|
| 278 |
+
input_placeholder = '"path/to/your/images/"'
|
| 279 |
+
load_data = ''
|
| 280 |
|
| 281 |
+
# Generate code based on classification mode
|
| 282 |
+
if classify_mode == "Single Model":
|
| 283 |
+
# Single model mode
|
| 284 |
+
mode_param = f',\n mode="{mode}"' if mode and input_type == "pdf" else ''
|
| 285 |
+
return f'''import catllm
|
| 286 |
+
{load_data}
|
| 287 |
# Define categories
|
| 288 |
categories = [
|
| 289 |
{categories_str}
|
| 290 |
]
|
| 291 |
|
| 292 |
+
# Classify data (input type is auto-detected)
|
| 293 |
result = catllm.classify(
|
| 294 |
+
input_data={input_placeholder},
|
| 295 |
categories=categories,
|
| 296 |
api_key="YOUR_API_KEY",
|
|
|
|
| 297 |
description="{description}",
|
| 298 |
+
user_model="{model}"{mode_param}
|
|
|
|
| 299 |
)
|
| 300 |
|
| 301 |
# View results
|
| 302 |
print(result)
|
| 303 |
result.to_csv("classified_results.csv", index=False)
|
| 304 |
'''
|
| 305 |
+
else:
|
| 306 |
+
# Multi-model mode (Comparison or Ensemble)
|
| 307 |
+
if models_list:
|
| 308 |
+
models_str = ",\n ".join([f'("{m}", "auto", "YOUR_API_KEY")' for m in models_list])
|
| 309 |
+
else:
|
| 310 |
+
models_str = '("gpt-4o", "auto", "YOUR_API_KEY"),\n ("claude-sonnet-4-5-20250929", "auto", "YOUR_API_KEY")'
|
|
|
|
|
|
|
| 311 |
|
| 312 |
+
mode_param = f',\n mode="{mode}"' if mode and input_type == "pdf" else ''
|
| 313 |
+
consensus_param = ',\n consensus_threshold=0.5' if classify_mode == "Ensemble" else ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
return f'''import catllm
|
| 316 |
+
{load_data}
|
| 317 |
# Define categories
|
| 318 |
categories = [
|
| 319 |
{categories_str}
|
| 320 |
]
|
| 321 |
|
| 322 |
+
# Define models for {"ensemble voting" if classify_mode == "Ensemble" else "comparison"}
|
| 323 |
+
models = [
|
| 324 |
+
{models_str}
|
| 325 |
+
]
|
| 326 |
+
|
| 327 |
+
# Classify with multiple models
|
| 328 |
result = catllm.classify(
|
| 329 |
+
input_data={input_placeholder},
|
| 330 |
categories=categories,
|
| 331 |
+
models=models,
|
| 332 |
+
description="{description}"{mode_param}{consensus_param}
|
|
|
|
|
|
|
|
|
|
| 333 |
)
|
| 334 |
|
| 335 |
# View results
|
|
|
|
| 1343 |
)
|
| 1344 |
|
| 1345 |
# Generate code
|
| 1346 |
+
code = generate_classify_code(
|
| 1347 |
+
input_type_selected, description, categories_entered,
|
| 1348 |
+
report_model, report_model_source, mode,
|
| 1349 |
+
classify_mode=classify_mode, models_list=models_list
|
| 1350 |
+
)
|
| 1351 |
|
| 1352 |
st.session_state.results = {
|
| 1353 |
'df': result_df,
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
streamlit>=1.32.0
|
| 2 |
-
cat-llm[pdf]>=0.1.
|
| 3 |
mistralai
|
| 4 |
pydantic==2.10.6
|
| 5 |
huggingface_hub<0.27.0
|
|
|
|
| 1 |
streamlit>=1.32.0
|
| 2 |
+
cat-llm[pdf]>=0.1.14
|
| 3 |
mistralai
|
| 4 |
pydantic==2.10.6
|
| 5 |
huggingface_hub<0.27.0
|