Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- CLAUDE.md +15 -0
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +27 -52
- requirements.txt +1 -1
CLAUDE.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CatLLM HuggingFace Space
|
| 2 |
+
|
| 3 |
+
## Deployment
|
| 4 |
+
|
| 5 |
+
Push to: https://huggingface.co/spaces/CatLLM/survey-classifier
|
| 6 |
+
|
| 7 |
+
## Authentication
|
| 8 |
+
|
| 9 |
+
HuggingFace token is stored in `.env` in this directory:
|
| 10 |
+
- Variable: `CATLLM_HF_TOKEN`
|
| 11 |
+
- Used for pushing to the CatLLM organization space
|
| 12 |
+
|
| 13 |
+
## Architecture
|
| 14 |
+
|
| 15 |
+
The HuggingFace app should always use the `catllm` Python package for core functions (classification, extraction, etc.). Do not duplicate catllm logic in the app—import and call `catllm.classify()` and `catllm.extract()` directly. If new functionality is needed (e.g., progress callbacks), add it to the catllm package first, then use it in the app.
|
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -547,18 +547,15 @@ def run_classify_data(input_type, input_data, description, categories,
|
|
| 547 |
start_time = time.time()
|
| 548 |
|
| 549 |
classify_kwargs = {
|
| 550 |
-
'
|
| 551 |
'categories': categories,
|
| 552 |
-
'
|
| 553 |
-
'
|
| 554 |
-
'description': description,
|
| 555 |
-
'user_model': model,
|
| 556 |
-
'model_source': model_source
|
| 557 |
}
|
| 558 |
if mode:
|
| 559 |
-
classify_kwargs['
|
| 560 |
|
| 561 |
-
result = catllm.
|
| 562 |
|
| 563 |
processing_time = time.time() - start_time
|
| 564 |
num_items = len(result)
|
|
@@ -1061,15 +1058,12 @@ with col_input:
|
|
| 1061 |
status_text.text(f"Processing page {current_idx+1} of {total_pages} ({page_label}) ({progress*100:.0f}%){eta_str}")
|
| 1062 |
|
| 1063 |
try:
|
| 1064 |
-
result_df = catllm.
|
| 1065 |
-
|
| 1066 |
categories=categories_entered,
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
user_model=model,
|
| 1071 |
-
model_source=model_source,
|
| 1072 |
-
mode=mode,
|
| 1073 |
progress_callback=pdf_progress_callback
|
| 1074 |
)
|
| 1075 |
|
|
@@ -1100,46 +1094,27 @@ with col_input:
|
|
| 1100 |
all_results = []
|
| 1101 |
|
| 1102 |
else:
|
| 1103 |
-
# Non-PDF processing (text, images) -
|
| 1104 |
-
all_results = []
|
| 1105 |
total_items = len(items_list)
|
|
|
|
| 1106 |
|
| 1107 |
-
|
| 1108 |
-
|
| 1109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
|
| 1111 |
-
|
| 1112 |
-
|
| 1113 |
-
|
| 1114 |
-
eta_seconds = avg_time * (total_items - i)
|
| 1115 |
-
eta_str = f" | ETA: {eta_seconds:.0f}s" if eta_seconds < 60 else f" | ETA: {eta_seconds/60:.1f}m"
|
| 1116 |
-
else:
|
| 1117 |
-
eta_str = ""
|
| 1118 |
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
input_data=[item],
|
| 1124 |
-
categories=categories_entered,
|
| 1125 |
-
api_key=actual_api_key,
|
| 1126 |
-
input_type=input_type_selected,
|
| 1127 |
-
description=description,
|
| 1128 |
-
user_model=model,
|
| 1129 |
-
model_source=model_source
|
| 1130 |
-
)
|
| 1131 |
-
all_results.append(item_result)
|
| 1132 |
-
|
| 1133 |
-
progress = (i + 1) / total_items if total_items > 0 else 1.0
|
| 1134 |
-
progress_bar.progress(min(progress, 1.0))
|
| 1135 |
-
|
| 1136 |
-
except Exception as e:
|
| 1137 |
-
st.warning(f"Error on item {i+1}: {str(e)}")
|
| 1138 |
-
continue
|
| 1139 |
-
|
| 1140 |
-
processing_time = time.time() - start_time
|
| 1141 |
-
progress_bar.progress(1.0)
|
| 1142 |
-
status_text.text(f"Completed {total_items} items in {processing_time:.1f}s")
|
| 1143 |
|
| 1144 |
if all_results:
|
| 1145 |
# Combine results
|
|
|
|
| 547 |
start_time = time.time()
|
| 548 |
|
| 549 |
classify_kwargs = {
|
| 550 |
+
'survey_input': input_data,
|
| 551 |
'categories': categories,
|
| 552 |
+
'models': [(model, model_source, actual_api_key)],
|
| 553 |
+
'input_description': description,
|
|
|
|
|
|
|
|
|
|
| 554 |
}
|
| 555 |
if mode:
|
| 556 |
+
classify_kwargs['pdf_mode'] = mode
|
| 557 |
|
| 558 |
+
result = catllm.multi_class_ensemble(**classify_kwargs)
|
| 559 |
|
| 560 |
processing_time = time.time() - start_time
|
| 561 |
num_items = len(result)
|
|
|
|
| 1058 |
status_text.text(f"Processing page {current_idx+1} of {total_pages} ({page_label}) ({progress*100:.0f}%){eta_str}")
|
| 1059 |
|
| 1060 |
try:
|
| 1061 |
+
result_df = catllm.multi_class_ensemble(
|
| 1062 |
+
survey_input=items_list,
|
| 1063 |
categories=categories_entered,
|
| 1064 |
+
models=[(model, model_source, actual_api_key)],
|
| 1065 |
+
input_description=description,
|
| 1066 |
+
pdf_mode=mode,
|
|
|
|
|
|
|
|
|
|
| 1067 |
progress_callback=pdf_progress_callback
|
| 1068 |
)
|
| 1069 |
|
|
|
|
| 1094 |
all_results = []
|
| 1095 |
|
| 1096 |
else:
|
| 1097 |
+
# Non-PDF processing (text, images) - process all at once
|
|
|
|
| 1098 |
total_items = len(items_list)
|
| 1099 |
+
status_text.text(f"Processing {total_items} items...")
|
| 1100 |
|
| 1101 |
+
try:
|
| 1102 |
+
result_df = catllm.multi_class_ensemble(
|
| 1103 |
+
survey_input=items_list,
|
| 1104 |
+
categories=categories_entered,
|
| 1105 |
+
models=[(model, model_source, actual_api_key)],
|
| 1106 |
+
input_description=description,
|
| 1107 |
+
)
|
| 1108 |
+
all_results = [result_df]
|
| 1109 |
|
| 1110 |
+
processing_time = time.time() - start_time
|
| 1111 |
+
progress_bar.progress(1.0)
|
| 1112 |
+
status_text.text(f"Completed {total_items} items in {processing_time:.1f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1113 |
|
| 1114 |
+
except Exception as e:
|
| 1115 |
+
st.error(f"Error: {str(e)}")
|
| 1116 |
+
all_results = []
|
| 1117 |
+
processing_time = time.time() - start_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
|
| 1119 |
if all_results:
|
| 1120 |
# Combine results
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
streamlit>=1.32.0
|
| 2 |
-
cat-llm[pdf]>=0.1.
|
| 3 |
mistralai
|
| 4 |
pydantic==2.10.6
|
| 5 |
huggingface_hub<0.27.0
|
|
|
|
| 1 |
streamlit>=1.32.0
|
| 2 |
+
cat-llm[pdf]>=0.1.9
|
| 3 |
mistralai
|
| 4 |
pydantic==2.10.6
|
| 5 |
huggingface_hub<0.27.0
|