chrissoria commited on
Commit
f3d18a8
·
verified ·
1 Parent(s): b4d8ab0

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. CLAUDE.md +15 -0
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +27 -52
  4. requirements.txt +1 -1
CLAUDE.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CatLLM HuggingFace Space
2
+
3
+ ## Deployment
4
+
5
+ Push to: https://huggingface.co/spaces/CatLLM/survey-classifier
6
+
7
+ ## Authentication
8
+
9
+ HuggingFace token is stored in `.env` in this directory:
10
+ - Variable: `CATLLM_HF_TOKEN`
11
+ - Used for pushing to the CatLLM organization space
12
+
13
+ ## Architecture
14
+
15
+ The HuggingFace app should always use the `catllm` Python package for core functions (classification, extraction, etc.). Do not duplicate catllm logic in the app—import and call `catllm.classify()` and `catllm.extract()` directly. If new functionality is needed (e.g., progress callbacks), add it to the catllm package first, then use it in the app.
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -547,18 +547,15 @@ def run_classify_data(input_type, input_data, description, categories,
547
  start_time = time.time()
548
 
549
  classify_kwargs = {
550
- 'input_data': input_data,
551
  'categories': categories,
552
- 'api_key': actual_api_key,
553
- 'input_type': input_type,
554
- 'description': description,
555
- 'user_model': model,
556
- 'model_source': model_source
557
  }
558
  if mode:
559
- classify_kwargs['mode'] = mode
560
 
561
- result = catllm.classify(**classify_kwargs)
562
 
563
  processing_time = time.time() - start_time
564
  num_items = len(result)
@@ -1061,15 +1058,12 @@ with col_input:
1061
  status_text.text(f"Processing page {current_idx+1} of {total_pages} ({page_label}) ({progress*100:.0f}%){eta_str}")
1062
 
1063
  try:
1064
- result_df = catllm.classify(
1065
- input_data=items_list,
1066
  categories=categories_entered,
1067
- api_key=actual_api_key,
1068
- input_type="pdf",
1069
- description=description,
1070
- user_model=model,
1071
- model_source=model_source,
1072
- mode=mode,
1073
  progress_callback=pdf_progress_callback
1074
  )
1075
 
@@ -1100,46 +1094,27 @@ with col_input:
1100
  all_results = []
1101
 
1102
  else:
1103
- # Non-PDF processing (text, images) - item by item
1104
- all_results = []
1105
  total_items = len(items_list)
 
1106
 
1107
- for i, item in enumerate(items_list):
1108
- progress = i / total_items if total_items > 0 else 0
1109
- progress_bar.progress(min(progress, 1.0))
 
 
 
 
 
1110
 
1111
- elapsed = time.time() - start_time
1112
- if i > 0:
1113
- avg_time = elapsed / i
1114
- eta_seconds = avg_time * (total_items - i)
1115
- eta_str = f" | ETA: {eta_seconds:.0f}s" if eta_seconds < 60 else f" | ETA: {eta_seconds/60:.1f}m"
1116
- else:
1117
- eta_str = ""
1118
 
1119
- status_text.text(f"Processing item {i+1} of {total_items} ({progress*100:.0f}%){eta_str}")
1120
-
1121
- try:
1122
- item_result = catllm.classify(
1123
- input_data=[item],
1124
- categories=categories_entered,
1125
- api_key=actual_api_key,
1126
- input_type=input_type_selected,
1127
- description=description,
1128
- user_model=model,
1129
- model_source=model_source
1130
- )
1131
- all_results.append(item_result)
1132
-
1133
- progress = (i + 1) / total_items if total_items > 0 else 1.0
1134
- progress_bar.progress(min(progress, 1.0))
1135
-
1136
- except Exception as e:
1137
- st.warning(f"Error on item {i+1}: {str(e)}")
1138
- continue
1139
-
1140
- processing_time = time.time() - start_time
1141
- progress_bar.progress(1.0)
1142
- status_text.text(f"Completed {total_items} items in {processing_time:.1f}s")
1143
 
1144
  if all_results:
1145
  # Combine results
 
547
  start_time = time.time()
548
 
549
  classify_kwargs = {
550
+ 'survey_input': input_data,
551
  'categories': categories,
552
+ 'models': [(model, model_source, actual_api_key)],
553
+ 'input_description': description,
 
 
 
554
  }
555
  if mode:
556
+ classify_kwargs['pdf_mode'] = mode
557
 
558
+ result = catllm.multi_class_ensemble(**classify_kwargs)
559
 
560
  processing_time = time.time() - start_time
561
  num_items = len(result)
 
1058
  status_text.text(f"Processing page {current_idx+1} of {total_pages} ({page_label}) ({progress*100:.0f}%){eta_str}")
1059
 
1060
  try:
1061
+ result_df = catllm.multi_class_ensemble(
1062
+ survey_input=items_list,
1063
  categories=categories_entered,
1064
+ models=[(model, model_source, actual_api_key)],
1065
+ input_description=description,
1066
+ pdf_mode=mode,
 
 
 
1067
  progress_callback=pdf_progress_callback
1068
  )
1069
 
 
1094
  all_results = []
1095
 
1096
  else:
1097
+ # Non-PDF processing (text, images) - process all at once
 
1098
  total_items = len(items_list)
1099
+ status_text.text(f"Processing {total_items} items...")
1100
 
1101
+ try:
1102
+ result_df = catllm.multi_class_ensemble(
1103
+ survey_input=items_list,
1104
+ categories=categories_entered,
1105
+ models=[(model, model_source, actual_api_key)],
1106
+ input_description=description,
1107
+ )
1108
+ all_results = [result_df]
1109
 
1110
+ processing_time = time.time() - start_time
1111
+ progress_bar.progress(1.0)
1112
+ status_text.text(f"Completed {total_items} items in {processing_time:.1f}s")
 
 
 
 
1113
 
1114
+ except Exception as e:
1115
+ st.error(f"Error: {str(e)}")
1116
+ all_results = []
1117
+ processing_time = time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1118
 
1119
  if all_results:
1120
  # Combine results
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  streamlit>=1.32.0
2
- cat-llm[pdf]>=0.1.8
3
  mistralai
4
  pydantic==2.10.6
5
  huggingface_hub<0.27.0
 
1
  streamlit>=1.32.0
2
+ cat-llm[pdf]>=0.1.9
3
  mistralai
4
  pydantic==2.10.6
5
  huggingface_hub<0.27.0