chrissoria commited on
Commit
badee2f
·
0 Parent(s):

Survey Response Classifier

Browse files
Files changed (4) hide show
  1. README.md +43 -0
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +272 -0
  4. requirements.txt +8 -0
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: catllm - Survey Response Classifier
3
+ emoji: 🏷️
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: "5.6.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Classify survey responses using LLMs
12
+ ---
13
+
14
+ # catllm - Survey Response Classifier
15
+
16
+ A web interface for the [catllm](https://github.com/chrissoria/cat-llm) Python package. Classify survey responses into custom categories using various LLM providers.
17
+
18
+ ## How to Use
19
+
20
+ 1. **Upload Your Data**: Upload a CSV or Excel file containing survey responses
21
+ 2. **Select Column**: Choose the column containing the text responses to classify
22
+ 3. **Define Categories**: Enter your classification categories (e.g., "Positive", "Negative", "Neutral")
23
+ 4. **Choose a Model**: Select your preferred LLM (free models available!)
24
+ 5. **Click Classify**: View and download results with category assignments
25
+
26
+ ## Supported Models
27
+
28
+ | Provider | Models |
29
+ |----------|--------|
30
+ | **OpenAI** | gpt-4o, gpt-4o-mini |
31
+ | **Anthropic** | claude-3-5-sonnet, claude-3-haiku |
32
+ | **Google** | gemini-1.5-pro, gemini-1.5-flash |
33
+ | **Mistral** | mistral-large-latest |
34
+
35
+ ## Privacy
36
+
37
+ Your API key is **never stored**. It is only used for the current classification request and is not logged or saved.
38
+
39
+ ## Learn More
40
+
41
+ - [catllm on PyPI](https://pypi.org/project/cat-llm/)
42
+ - [GitHub Repository](https://github.com/chrissoria/cat-llm)
43
+ - [Documentation](https://github.com/chrissoria/cat-llm#readme)
__pycache__/app.cpython-311.pyc ADDED
Binary file (32.6 kB). View file
 
app.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio app - Step 5g: Add actual catllm classification
3
+ """
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ import tempfile
8
+ import os
9
+
10
+ # Import catllm
11
+ try:
12
+ import catllm
13
+ CATLLM_AVAILABLE = True
14
+ except ImportError as e:
15
+ print(f"Warning: Could not import catllm: {e}")
16
+ CATLLM_AVAILABLE = False
17
+
18
+ MAX_CATEGORIES = 10
19
+ INITIAL_CATEGORIES = 3
20
+
21
+ MODEL_CHOICES = [
22
+ "Qwen/Qwen3-VL-235B-A22B-Instruct:novita (Free)",
23
+ "deepseek-ai/DeepSeek-V3.1:novita (Free)",
24
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct:groq (Free)",
25
+ "gpt-4o",
26
+ "claude-sonnet-4-5-20250929",
27
+ "gemini-2.5-flash",
28
+ ]
29
+
30
+ HF_FREE_MODELS = {
31
+ "Qwen/Qwen3-VL-235B-A22B-Instruct:novita (Free)": "Qwen/Qwen3-VL-235B-A22B-Instruct:novita",
32
+ "deepseek-ai/DeepSeek-V3.1:novita (Free)": "deepseek-ai/DeepSeek-V3.1:novita",
33
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct:groq (Free)": "meta-llama/Llama-4-Maverick-17B-128E-Instruct:groq",
34
+ }
35
+
36
+
37
+ def is_free_model(model):
38
+ return "(Free)" in model
39
+
40
+
41
+ def get_model_source(model):
42
+ """Auto-detect model source. All HF router models (novita, groq, etc) use 'huggingface'."""
43
+ model_lower = model.lower()
44
+ if "gpt" in model_lower:
45
+ return "openai"
46
+ elif "claude" in model_lower:
47
+ return "anthropic"
48
+ elif "gemini" in model_lower:
49
+ return "google"
50
+ elif "mistral" in model_lower and ":novita" not in model_lower:
51
+ return "mistral"
52
+ # All models routed through HuggingFace (including novita, groq variants)
53
+ elif any(x in model_lower for x in [":novita", ":groq", "qwen", "llama", "deepseek"]):
54
+ return "huggingface"
55
+ return "huggingface"
56
+
57
+
58
+ def load_columns(file):
59
+ if file is None:
60
+ return gr.update(choices=[], value=None), "Please upload a file first"
61
+
62
+ try:
63
+ file_path = file if isinstance(file, str) else file.name
64
+ if file_path.endswith('.csv'):
65
+ df = pd.read_csv(file_path)
66
+ else:
67
+ df = pd.read_excel(file_path)
68
+
69
+ columns = df.columns.tolist()
70
+ return (
71
+ gr.update(choices=columns, value=columns[0] if columns else None),
72
+ f"Loaded {len(df)} rows. Select column and click Classify."
73
+ )
74
+ except Exception as e:
75
+ return gr.update(choices=[], value=None), f"**Error:** {str(e)}"
76
+
77
+
78
+ def classify_data(spreadsheet_file, spreadsheet_column,
79
+ cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
80
+ model, model_source_input, api_key_input):
81
+ """Main classification function."""
82
+ if not CATLLM_AVAILABLE:
83
+ return None, None, "**Error:** catllm package not available"
84
+
85
+ all_cats = [cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10]
86
+ categories = [c.strip() for c in all_cats if c and c.strip()]
87
+
88
+ if not categories:
89
+ return None, None, "**Error:** Please enter at least one category"
90
+
91
+ # Get API key - priority: user input > environment variable
92
+ if is_free_model(model):
93
+ actual_api_key = os.environ.get("HF_API_KEY", "")
94
+ actual_model = HF_FREE_MODELS.get(model, model.replace(" (Free)", ""))
95
+ if not actual_api_key:
96
+ return None, None, "**Error:** HuggingFace API key not configured in Space secrets"
97
+ else:
98
+ # For paid models, check user input first, then environment
99
+ actual_model = model
100
+ if api_key_input and api_key_input.strip():
101
+ actual_api_key = api_key_input.strip()
102
+ else:
103
+ # Try to get from environment based on model
104
+ if "gpt" in model.lower():
105
+ actual_api_key = os.environ.get("OPENAI_API_KEY", "")
106
+ elif "claude" in model.lower():
107
+ actual_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
108
+ elif "gemini" in model.lower():
109
+ actual_api_key = os.environ.get("GOOGLE_API_KEY", "")
110
+ else:
111
+ actual_api_key = ""
112
+
113
+ if not actual_api_key:
114
+ return None, None, f"**Error:** Please provide an API key for {model}"
115
+
116
+ # Use user-selected model_source, or auto-detect if "auto"
117
+ if model_source_input == "auto":
118
+ model_source = get_model_source(actual_model)
119
+ else:
120
+ model_source = model_source_input
121
+
122
+ try:
123
+ if not spreadsheet_file:
124
+ return None, None, "**Error:** Please upload a file"
125
+ if not spreadsheet_column:
126
+ return None, None, "**Error:** Please select a column to classify"
127
+
128
+ file_path = spreadsheet_file if isinstance(spreadsheet_file, str) else spreadsheet_file.name
129
+ if file_path.endswith('.csv'):
130
+ df = pd.read_csv(file_path)
131
+ else:
132
+ df = pd.read_excel(file_path)
133
+
134
+ if spreadsheet_column not in df.columns:
135
+ return None, None, f"**Error:** Column '{spreadsheet_column}' not found"
136
+
137
+ input_data = df[spreadsheet_column].tolist()
138
+
139
+ result = catllm.multi_class(
140
+ survey_input=input_data,
141
+ categories=categories,
142
+ api_key=actual_api_key,
143
+ user_model=actual_model,
144
+ model_source=model_source
145
+ )
146
+
147
+ # Save for download
148
+ with tempfile.NamedTemporaryFile(mode='w', suffix='_classified.csv', delete=False) as f:
149
+ result.to_csv(f.name, index=False)
150
+ download_path = f.name
151
+
152
+ return result, download_path, f"**Success!** Classified {len(input_data)} responses"
153
+
154
+ except Exception as e:
155
+ return None, None, f"**Error:** {str(e)}"
156
+
157
+
158
+ def add_category_field(current_count):
159
+ new_count = min(current_count + 1, MAX_CATEGORIES)
160
+ updates = []
161
+ for i in range(MAX_CATEGORIES):
162
+ updates.append(gr.update(visible=(i < new_count)))
163
+ updates.append(gr.update(visible=(new_count < MAX_CATEGORIES)))
164
+ updates.append(new_count)
165
+ return updates
166
+
167
+
168
+ with gr.Blocks(title="catllm - Survey Response Classifier") as demo:
169
+ gr.Markdown("# catllm - Survey Response Classifier")
170
+ gr.Markdown("Classify survey responses into custom categories using LLMs.")
171
+
172
+ category_count = gr.State(value=INITIAL_CATEGORIES)
173
+
174
+ with gr.Row():
175
+ with gr.Column():
176
+ spreadsheet_file = gr.File(
177
+ label="Upload Survey Data (CSV or Excel)",
178
+ file_types=[".csv", ".xlsx", ".xls"]
179
+ )
180
+
181
+ with gr.Row():
182
+ spreadsheet_column = gr.Dropdown(
183
+ label="Column to Classify",
184
+ choices=[],
185
+ info="Select the column containing text to classify"
186
+ )
187
+ load_cols_btn = gr.Button("Load Columns", size="sm")
188
+
189
+ gr.Markdown("### Categories")
190
+ category_inputs = []
191
+ for i in range(MAX_CATEGORIES):
192
+ visible = i < INITIAL_CATEGORIES
193
+ cat_input = gr.Textbox(
194
+ label=f"Category {i+1}",
195
+ placeholder=f"e.g., {'Positive' if i==0 else 'Negative' if i==1 else 'Neutral'}",
196
+ visible=visible
197
+ )
198
+ category_inputs.append(cat_input)
199
+
200
+ add_category_btn = gr.Button("+ Add More Categories", variant="secondary", size="sm")
201
+
202
+ gr.Markdown("### Model")
203
+ model = gr.Dropdown(
204
+ choices=MODEL_CHOICES,
205
+ value="Qwen/Qwen3-VL-235B-A22B-Instruct:novita (Free)",
206
+ label="Model",
207
+ allow_custom_value=True
208
+ )
209
+
210
+ model_source = gr.Dropdown(
211
+ choices=["auto", "openai", "anthropic", "google", "mistral", "xai", "huggingface", "perplexity"],
212
+ value="auto",
213
+ label="Model Source",
214
+ info="Auto-detects from model name, or select manually. Use 'huggingface' for Qwen/Llama/DeepSeek models."
215
+ )
216
+
217
+ api_key = gr.Textbox(
218
+ label="API Key (optional)",
219
+ type="password",
220
+ placeholder="Enter your API key, or leave blank to use Space secrets",
221
+ info="For paid models, enter your key or configure in Space secrets"
222
+ )
223
+
224
+ api_key_status = gr.Markdown("**Free model selected** - no API key required!")
225
+
226
+ classify_btn = gr.Button("Classify", variant="primary")
227
+
228
+ with gr.Column():
229
+ status = gr.Markdown("Ready to classify")
230
+ results = gr.DataFrame(label="Classification Results")
231
+ download_file = gr.File(label="Download Results")
232
+
233
+ # Event handlers
234
+ def update_api_key_status(selected_model):
235
+ if is_free_model(selected_model):
236
+ return "**Free model selected** - no API key required!"
237
+ elif "gpt" in selected_model.lower():
238
+ return "**OpenAI model** - using OPENAI_API_KEY from secrets (or enter your own)"
239
+ elif "claude" in selected_model.lower():
240
+ return "**Anthropic model** - using ANTHROPIC_API_KEY from secrets (or enter your own)"
241
+ elif "gemini" in selected_model.lower():
242
+ return "**Google model** - using GOOGLE_API_KEY from secrets (or enter your own)"
243
+ else:
244
+ return "**Paid model** - enter your API key or configure in Space secrets"
245
+
246
+ model.change(
247
+ fn=update_api_key_status,
248
+ inputs=[model],
249
+ outputs=[api_key_status]
250
+ )
251
+
252
+ load_cols_btn.click(
253
+ fn=load_columns,
254
+ inputs=[spreadsheet_file],
255
+ outputs=[spreadsheet_column, status]
256
+ )
257
+
258
+ add_category_btn.click(
259
+ fn=add_category_field,
260
+ inputs=[category_count],
261
+ outputs=category_inputs + [add_category_btn, category_count]
262
+ )
263
+
264
+ classify_btn.click(
265
+ fn=classify_data,
266
+ inputs=[spreadsheet_file, spreadsheet_column] + category_inputs + [model, model_source, api_key],
267
+ outputs=[results, download_file, status]
268
+ )
269
+
270
+
271
+ if __name__ == "__main__":
272
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ cat-llm[pdf]>=0.1.01
2
+ gradio==5.6.0
3
+ pydantic==2.10.6
4
+ huggingface_hub<0.27.0
5
+ pandas
6
+ openpyxl
7
+ requests
8
+ regex