piyushdev commited on
Commit
9c7c2f9
·
verified ·
1 Parent(s): 913af7e

Updated code with cursor

Browse files
Files changed (1) hide show
  1. app.py +230 -50
app.py CHANGED
@@ -1,70 +1,250 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
3
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
20
 
21
- messages.extend(history)
22
 
23
- messages.append({"role": "user", "content": message})
24
 
25
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
 
 
 
 
 
 
 
 
27
  for message in client.chat_completion(
28
  messages,
29
  max_tokens=max_tokens,
30
- stream=True,
31
  temperature=temperature,
32
  top_p=top_p,
33
  ):
34
  choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
-
63
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  with gr.Sidebar():
65
  gr.LoginButton()
66
- chatbot.render()
67
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import pandas as pd
4
+ import json
5
+ import os
6
+ from datetime import datetime
7
 
8
+ # Custom system instructions for business category descriptions
9
+ SYSTEM_INSTRUCTIONS = """You are an expert at writing clear and visual descriptions for a business category keyword for a yellow pages or business listing website. Given a category keyword, generate a single, detailed description that defines its key visual elements, location, and context. Do not add artistic or stylistic flair. Ensure that the description is CLIP model ready and not too verbose.
10
 
11
+ Here are some examples of the correct format:
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ Category: "Car Rental For Self Driven"
14
 
15
+ Description: "a car available for self-drive rental, parked at a pickup spot without a chauffeur; looks travel-ready, clean, well-maintained, keys handed over to customer"
16
 
17
+ Category: "Mehandi"
18
 
19
+ Description: "Temporary henna artwork applied on hands and feet using cones; fine brown or maroon floral and paisley patterns, mandalas, and lace-like detailing, commonly seen at weddings and festivals."
20
+
21
+ Category: "Photographer"
22
+
23
+ Description: "a person actively shooting photos or posing with a camera; holding a camera to eye, adjusting lens, or directing a subject during a shoot"
24
+
25
+ Category: "Equipment"
26
+
27
+ Description: "lighting stands, softboxes, strobes, tripods, reflectors, gimbals, battery packs, memory cards arranged as gear kits"
28
+
29
+ ---
30
+
31
+ Now, I will provide a new category. Output the category name and Description in json format."""
32
 
33
+
34
+ def process_single_category(category, client, max_tokens, temperature, top_p):
35
+ """Process a single category keyword and return the description."""
36
+ messages = [
37
+ {"role": "system", "content": SYSTEM_INSTRUCTIONS},
38
+ {"role": "user", "content": f"Category: {category}"}
39
+ ]
40
+
41
+ response = ""
42
  for message in client.chat_completion(
43
  messages,
44
  max_tokens=max_tokens,
45
+ stream=False,
46
  temperature=temperature,
47
  top_p=top_p,
48
  ):
49
  choices = message.choices
50
+ if len(choices) and choices[0].message.content:
51
+ response = choices[0].message.content
52
+ break
53
+
54
+ return response
55
+
56
+
57
+ def process_csv_files(
58
+ files,
59
+ category_column,
60
+ max_tokens,
61
+ temperature,
62
+ top_p,
63
+ hf_token: gr.OAuthToken,
64
+ progress=gr.Progress()
65
+ ):
66
+ """
67
+ Process multiple CSV files and generate descriptions for category keywords.
68
+ """
69
+ if not files or len(files) == 0:
70
+ return "Please upload at least one CSV file.", None
71
+
72
+ if not hf_token or not hf_token.token:
73
+ return "Please login with your Hugging Face account.", None
74
+
75
+ client = InferenceClient(token=hf_token.token, model="meta-llama/Llama-3.3-70B-Instruct")
76
+
77
+ output_files = []
78
+ status_messages = []
79
+
80
+ for file_idx, file in enumerate(files):
81
+ try:
82
+ # Read CSV file
83
+ df = pd.read_csv(file.name)
84
+ status_messages.append(f"📄 Processing file {file_idx + 1}/{len(files)}: {os.path.basename(file.name)}")
85
+
86
+ # Check if category column exists
87
+ if category_column not in df.columns:
88
+ status_messages.append(f"⚠️ Warning: Column '{category_column}' not found in {os.path.basename(file.name)}. Available columns: {', '.join(df.columns)}")
89
+ continue
90
+
91
+ # Process each category
92
+ descriptions = []
93
+ raw_responses = []
94
+
95
+ categories = df[category_column].dropna().unique()
96
+ total_categories = len(categories)
97
+
98
+ for idx, category in enumerate(categories):
99
+ progress((file_idx * total_categories + idx) / (len(files) * total_categories),
100
+ desc=f"Processing category {idx + 1}/{total_categories} in file {file_idx + 1}")
101
+
102
+ try:
103
+ response = process_single_category(
104
+ category, client, max_tokens, temperature, top_p
105
+ )
106
+ raw_responses.append(response)
107
+
108
+ # Try to parse JSON response
109
+ try:
110
+ # Extract JSON from response if wrapped in markdown code blocks
111
+ if "```json" in response:
112
+ json_str = response.split("```json")[1].split("```")[0].strip()
113
+ elif "```" in response:
114
+ json_str = response.split("```")[1].split("```")[0].strip()
115
+ else:
116
+ json_str = response.strip()
117
+
118
+ parsed = json.loads(json_str)
119
+ description = parsed.get("Description", parsed.get("description", ""))
120
+ except:
121
+ # If JSON parsing fails, use the raw response
122
+ description = response
123
+
124
+ descriptions.append({
125
+ "Category": category,
126
+ "Description": description,
127
+ "Raw_Response": response
128
+ })
129
+
130
+ except Exception as e:
131
+ status_messages.append(f"⚠️ Error processing category '{category}': {str(e)}")
132
+ descriptions.append({
133
+ "Category": category,
134
+ "Description": f"Error: {str(e)}",
135
+ "Raw_Response": ""
136
+ })
137
+
138
+ # Create output dataframe
139
+ output_df = pd.DataFrame(descriptions)
140
+
141
+ # Save to file
142
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
143
+ base_name = os.path.splitext(os.path.basename(file.name))[0]
144
+ output_filename = f"output_{base_name}_{timestamp}.csv"
145
+ output_df.to_csv(output_filename, index=False)
146
+ output_files.append(output_filename)
147
+
148
+ status_messages.append(f"✅ Completed: {len(descriptions)} categories processed from {os.path.basename(file.name)}")
149
+
150
+ except Exception as e:
151
+ status_messages.append(f"❌ Error processing {os.path.basename(file.name)}: {str(e)}")
152
+
153
+ status_text = "\n".join(status_messages)
154
+
155
+ if output_files:
156
+ return status_text, output_files
157
+ else:
158
+ return status_text + "\n\n❌ No output files generated.", None
159
+
160
+
161
+ # Create Gradio interface
162
+ with gr.Blocks(title="Business Category Description Generator") as demo:
163
+ gr.Markdown("""
164
+ # 🏢 Business Category Description Generator
165
+
166
+ Upload CSV files containing business category keywords, and this app will generate
167
+ CLIP-ready visual descriptions for each category using AI.
168
+
169
+ **Instructions:**
170
+ 1. Login with your Hugging Face account (required)
171
+ 2. Upload one or more CSV files
172
+ 3. Specify the column name that contains the category keywords
173
+ 4. Click "Process Files" to generate descriptions
174
+ 5. Download the output CSV files
175
+ """)
176
+
177
  with gr.Sidebar():
178
  gr.LoginButton()
179
+ gr.Markdown("### Model Settings")
180
+ max_tokens = gr.Slider(
181
+ minimum=64,
182
+ maximum=512,
183
+ value=256,
184
+ step=16,
185
+ label="Max Tokens"
186
+ )
187
+ temperature = gr.Slider(
188
+ minimum=0.1,
189
+ maximum=1.0,
190
+ value=0.7,
191
+ step=0.1,
192
+ label="Temperature"
193
+ )
194
+ top_p = gr.Slider(
195
+ minimum=0.1,
196
+ maximum=1.0,
197
+ value=0.9,
198
+ step=0.05,
199
+ label="Top-p"
200
+ )
201
+
202
+ with gr.Row():
203
+ with gr.Column():
204
+ files_input = gr.File(
205
+ label="Upload CSV Files",
206
+ file_count="multiple",
207
+ file_types=[".csv"]
208
+ )
209
+ category_column = gr.Textbox(
210
+ label="Category Column Name",
211
+ value="category",
212
+ placeholder="Enter the name of the column containing categories"
213
+ )
214
+ process_btn = gr.Button("🚀 Process Files", variant="primary", size="lg")
215
+
216
+ with gr.Column():
217
+ status_output = gr.Textbox(
218
+ label="Status",
219
+ lines=10,
220
+ interactive=False
221
+ )
222
+ files_output = gr.File(
223
+ label="Download Output Files",
224
+ file_count="multiple"
225
+ )
226
+
227
+ process_btn.click(
228
+ fn=process_csv_files,
229
+ inputs=[
230
+ files_input,
231
+ category_column,
232
+ max_tokens,
233
+ temperature,
234
+ top_p,
235
+ gr.OAuthToken()
236
+ ],
237
+ outputs=[status_output, files_output]
238
+ )
239
+
240
+ gr.Markdown("""
241
+ ---
242
+ ### 📝 Output Format
243
+ Each output CSV file will contain:
244
+ - **Category**: The original category keyword
245
+ - **Description**: The generated visual description
246
+ - **Raw_Response**: The complete model response (including JSON)
247
+ """)
248
 
249
  if __name__ == "__main__":
250
+ demo.launch()