chrissoria commited on
Commit
4ec96e7
·
verified ·
1 Parent(s): c04a288

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +43 -40
  2. requirements.txt +1 -1
app.py CHANGED
@@ -259,78 +259,77 @@ print(result["counts_df"])
259
  '''
260
 
261
 
262
- def generate_classify_code(input_type, description, categories, model, model_source, mode=None):
263
  """Generate Python code for classification."""
264
  categories_str = ",\n ".join([f'"{cat}"' for cat in categories])
265
 
 
266
  if input_type == "text":
267
- return f'''import catllm
268
- import pandas as pd
269
 
270
  # Load your data
271
  df = pd.read_csv("your_data.csv")
 
 
 
 
 
 
 
272
 
 
 
 
 
 
 
273
  # Define categories
274
  categories = [
275
  {categories_str}
276
  ]
277
 
278
- # Classify the text data
279
  result = catllm.classify(
280
- input_data=df["{description}"].tolist(),
281
  categories=categories,
282
  api_key="YOUR_API_KEY",
283
- input_type="text",
284
  description="{description}",
285
- user_model="{model}",
286
- model_source="{model_source}"
287
  )
288
 
289
  # View results
290
  print(result)
291
  result.to_csv("classified_results.csv", index=False)
292
  '''
293
- elif input_type == "pdf":
294
- mode_line = f',\n mode="{mode}"' if mode else ''
295
- return f'''import catllm
296
-
297
- # Define categories
298
- categories = [
299
- {categories_str}
300
- ]
301
 
302
- # Classify PDF documents
303
- result = catllm.classify(
304
- input_data="path/to/your/pdfs/",
305
- categories=categories,
306
- api_key="YOUR_API_KEY",
307
- input_type="pdf",
308
- description="{description}"{mode_line},
309
- user_model="{model}",
310
- model_source="{model_source}"
311
- )
312
 
313
- # View results
314
- print(result)
315
- result.to_csv("classified_results.csv", index=False)
316
- '''
317
- else: # image
318
  return f'''import catllm
319
-
320
  # Define categories
321
  categories = [
322
  {categories_str}
323
  ]
324
 
325
- # Classify images
 
 
 
 
 
326
  result = catllm.classify(
327
- input_data="path/to/your/images/",
328
  categories=categories,
329
- api_key="YOUR_API_KEY",
330
- input_type="image",
331
- description="{description}",
332
- user_model="{model}",
333
- model_source="{model_source}"
334
  )
335
 
336
  # View results
@@ -1344,7 +1343,11 @@ with col_input:
1344
  )
1345
 
1346
  # Generate code
1347
- code = generate_classify_code(input_type_selected, description, categories_entered, report_model, report_model_source, mode)
 
 
 
 
1348
 
1349
  st.session_state.results = {
1350
  'df': result_df,
 
259
  '''
260
 
261
 
262
+ def generate_classify_code(input_type, description, categories, model, model_source, mode=None, classify_mode="Single Model", models_list=None):
263
  """Generate Python code for classification."""
264
  categories_str = ",\n ".join([f'"{cat}"' for cat in categories])
265
 
266
+ # Determine input data placeholder based on type
267
  if input_type == "text":
268
+ input_placeholder = 'df["your_column"].tolist()'
269
+ load_data = '''import pandas as pd
270
 
271
  # Load your data
272
  df = pd.read_csv("your_data.csv")
273
+ '''
274
+ elif input_type == "pdf":
275
+ input_placeholder = '"path/to/your/pdfs/"'
276
+ load_data = ''
277
+ else: # image
278
+ input_placeholder = '"path/to/your/images/"'
279
+ load_data = ''
280
 
281
+ # Generate code based on classification mode
282
+ if classify_mode == "Single Model":
283
+ # Single model mode
284
+ mode_param = f',\n mode="{mode}"' if mode and input_type == "pdf" else ''
285
+ return f'''import catllm
286
+ {load_data}
287
  # Define categories
288
  categories = [
289
  {categories_str}
290
  ]
291
 
292
+ # Classify data (input type is auto-detected)
293
  result = catllm.classify(
294
+ input_data={input_placeholder},
295
  categories=categories,
296
  api_key="YOUR_API_KEY",
 
297
  description="{description}",
298
+ user_model="{model}"{mode_param}
 
299
  )
300
 
301
  # View results
302
  print(result)
303
  result.to_csv("classified_results.csv", index=False)
304
  '''
305
+ else:
306
+ # Multi-model mode (Comparison or Ensemble)
307
+ if models_list:
308
+ models_str = ",\n ".join([f'("{m}", "auto", "YOUR_API_KEY")' for m in models_list])
309
+ else:
310
+ models_str = '("gpt-4o", "auto", "YOUR_API_KEY"),\n ("claude-sonnet-4-5-20250929", "auto", "YOUR_API_KEY")'
 
 
311
 
312
+ mode_param = f',\n mode="{mode}"' if mode and input_type == "pdf" else ''
313
+ consensus_param = ',\n consensus_threshold=0.5' if classify_mode == "Ensemble" else ''
 
 
 
 
 
 
 
 
314
 
 
 
 
 
 
315
  return f'''import catllm
316
+ {load_data}
317
  # Define categories
318
  categories = [
319
  {categories_str}
320
  ]
321
 
322
+ # Define models for {"ensemble voting" if classify_mode == "Ensemble" else "comparison"}
323
+ models = [
324
+ {models_str}
325
+ ]
326
+
327
+ # Classify with multiple models
328
  result = catllm.classify(
329
+ input_data={input_placeholder},
330
  categories=categories,
331
+ models=models,
332
+ description="{description}"{mode_param}{consensus_param}
 
 
 
333
  )
334
 
335
  # View results
 
1343
  )
1344
 
1345
  # Generate code
1346
+ code = generate_classify_code(
1347
+ input_type_selected, description, categories_entered,
1348
+ report_model, report_model_source, mode,
1349
+ classify_mode=classify_mode, models_list=models_list
1350
+ )
1351
 
1352
  st.session_state.results = {
1353
  'df': result_df,
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  streamlit>=1.32.0
2
- cat-llm[pdf]>=0.1.13
3
  mistralai
4
  pydantic==2.10.6
5
  huggingface_hub<0.27.0
 
1
  streamlit>=1.32.0
2
+ cat-llm[pdf]>=0.1.14
3
  mistralai
4
  pydantic==2.10.6
5
  huggingface_hub<0.27.0