limitedonly41 commited on
Commit
29c5fd9
·
verified ·
1 Parent(s): e354a1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -8
app.py CHANGED
@@ -128,17 +128,13 @@ def classify_website(url):
128
  try:
129
  # Prepare the input prompt for the model
130
  prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
131
-
132
  ### Instruction:
133
  Categorize the website into one of the 3 categories:
134
-
135
  1) OTHER
136
  2) NEWS/BLOG
137
  3) E-commerce
138
-
139
  ### Input:
140
  {translated}
141
-
142
  ### Response:"""
143
 
144
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
@@ -159,13 +155,35 @@ Categorize the website into one of the 3 categories:
159
  logging.exception(e)
160
  return str(e)
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  # Create a Gradio interface
163
  iface = gr.Interface(
164
- fn=classify_website,
165
- inputs="text",
166
- outputs="text",
167
  title="Website Categorization",
168
- description="Categorize a website into one of the 3 categories: OTHER, NEWS/BLOG, or E-commerce."
169
  )
170
 
171
  # Launch the interface
 
128
  try:
129
  # Prepare the input prompt for the model
130
  prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
131
  ### Instruction:
132
  Categorize the website into one of the 3 categories:
 
133
  1) OTHER
134
  2) NEWS/BLOG
135
  3) E-commerce
 
136
  ### Input:
137
  {translated}
 
138
  ### Response:"""
139
 
140
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
155
  logging.exception(e)
156
  return str(e)
157
 
158
+
159
+ def classify_urls_from_csv(csv_file):
160
+ # Read CSV file and extract URLs from the first column
161
+ df = pd.read_csv(csv_file)
162
+ urls = df.iloc[:, 0].tolist()
163
+
164
+ # Classify each URL and store the results
165
+ predictions = []
166
+ for url in urls:
167
+ prediction = classify_website(url)
168
+ predictions.append(prediction)
169
+
170
+ # Add predictions as a new column in the dataframe
171
+ df['Prediction'] = predictions
172
+
173
+ # Save the results to a new CSV file
174
+ output_file = "predictions.csv"
175
+ df.to_csv(output_file, index=False)
176
+
177
+ return output_file
178
+
179
+
180
  # Create a Gradio interface
181
  iface = gr.Interface(
182
+ fn=classify_urls_from_csv,
183
+ inputs=gr.File(label="Upload CSV (First column must contain URLs)"),
184
+ outputs=gr.File(label="Download CSV with Predictions"),
185
  title="Website Categorization",
186
+ description="Upload a CSV file with URLs in the first column and categorize each website into one of the 3 categories: OTHER, NEWS/BLOG, or E-commerce."
187
  )
188
 
189
  # Launch the interface