minh9972t12 commited on
Commit
90fa95b
·
verified ·
1 Parent(s): 22304be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -328
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Event Tags Generator - AI Chatbot for automatic tag generation
3
- Generates relevant tags, keywords, and categories from event information
4
  """
5
 
6
  from fastapi import FastAPI, HTTPException
@@ -16,9 +16,9 @@ import uvicorn
16
 
17
  # Initialize FastAPI
18
  app = FastAPI(
19
- title="Event Tags Generator API",
20
- description="AI-powered automatic tag generation for events using LLM",
21
- version="1.0.1"
22
  )
23
 
24
  # CORS middleware
@@ -39,25 +39,21 @@ else:
39
 
40
 
41
  # Pydantic models
42
- class EventTagsRequest(BaseModel):
43
  event_name: str
44
  category: str
45
  short_description: str
46
  detailed_description: str
47
- max_tags: Optional[int] = 10
48
- language: Optional[str] = "vi" # vi = Vietnamese, en = English
49
  hf_token: Optional[str] = None
50
 
51
 
52
- class EventTagsResponse(BaseModel):
53
  event_name: str
54
- generated_tags: List[str]
55
- primary_category: str
56
- secondary_categories: List[str]
57
- keywords: List[str]
58
  hashtags: List[str]
 
59
  target_audience: List[str]
60
- sentiment: str
61
  confidence_score: float
62
  generation_time: str
63
  model_used: str
@@ -68,43 +64,30 @@ async def root():
68
  """API Information"""
69
  return {
70
  "status": "running",
71
- "service": "Event Tags Generator API",
72
- "version": "1.0.1",
73
- "description": "Generate tags, keywords, categories automatically from event info",
74
  "endpoints": {
75
- "POST /generate-tags": {
76
- "description": "Generate tags from event information",
77
  "request_body": {
78
  "event_name": "string - Tên sự kiện",
79
  "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
80
  "short_description": "string - Mô tả ngắn (1-2 câu)",
81
  "detailed_description": "string - Mô tả chi tiết",
82
- "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
83
- "language": "string (optional, default: 'vi') - Ngôn ngữ output",
84
- "hf_token": "string (optional) - Hugging Face token"
85
  }
86
  }
87
- },
88
- "usage": "POST /generate-tags with event information in JSON body"
89
  }
90
 
91
 
92
- def build_powerful_prompt(
93
- event_name: str,
94
- category: str,
95
- short_desc: str,
96
- detailed_desc: str,
97
- max_tags: int,
98
- language: str
99
- ) -> str:
100
- """
101
- Build a concise, JSON-focused prompt for better parsing
102
- """
103
-
104
  lang_instruction = "tiếng Việt" if language == "vi" else "English"
105
-
106
- # Shorter, more focused prompt that demands JSON output
107
- prompt = f"""Phân tích sự kiện và tạo metadata theo format JSON bên dưới.
108
 
109
  SỰ KIỆN:
110
  Tên: {event_name}
@@ -112,334 +95,126 @@ Danh mục: {category}
112
  Mô tả ngắn: {short_desc}
113
  Mô tả chi tiết: {detailed_desc}
114
 
115
- YÊU CẦU: Tạo output dưới dạng JSON với các trường sau (sử dụng {lang_instruction}):
 
 
 
 
 
 
 
116
 
 
117
  {{
118
- "tags": ["tag1", "tag2", "tag3", ...],
119
- "primary_category": "danh mục chính",
120
- "secondary_categories": ["danh mục phụ 1", "danh mục phụ 2"],
121
- "keywords": ["keyword1", "keyword2", ...],
122
- "hashtags": ["#hashtag1", "#hashtag2", ...],
123
- "target_audience": ["đối tượng 1", "đối tượng 2"],
124
- "sentiment": "positive/neutral/negative"
125
  }}
126
-
127
- CHÚ Ý:
128
- - Tạo tối đa {max_tags} tags
129
- - Tags phải lowercase, ngắn gọn, dễ tìm kiếm
130
- - Hashtags bắt đầu bằng #
131
- - Primary_category chọn từ: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí
132
- - Chỉ trả về JSON, không thêm text khác
133
-
134
- JSON OUTPUT:"""
135
-
136
  return prompt
137
 
138
 
139
- def parse_llm_response(response_text: str, max_tags: int) -> dict:
140
- """
141
- Parse LLM response - handles both JSON and text formats
142
- """
143
-
144
- # Default result
145
- result = {
146
- "generated_tags": [],
147
- "primary_category": "",
148
- "secondary_categories": [],
149
- "keywords": [],
150
- "hashtags": [],
151
- "target_audience": [],
152
- "sentiment": "neutral"
153
- }
154
-
155
- # Debug: Print raw response
156
- print(f"\n{'='*60}")
157
- print(f"RAW RESPONSE FROM MODEL:")
158
- print(f"{'='*60}")
159
- print(response_text[:500]) # Print first 500 chars
160
- print(f"{'='*60}\n")
161
-
162
- # Try to extract JSON from response
163
  try:
164
- # Method 1: Try direct JSON parse
165
- try:
166
- data = json.loads(response_text)
167
- if isinstance(data, dict):
168
- result["generated_tags"] = data.get("tags", [])[:max_tags]
169
- result["primary_category"] = data.get("primary_category", "")
170
- result["secondary_categories"] = data.get("secondary_categories", [])
171
- result["keywords"] = data.get("keywords", [])
172
- result["hashtags"] = data.get("hashtags", [])
173
- result["target_audience"] = data.get("target_audience", [])
174
- result["sentiment"] = data.get("sentiment", "neutral")
175
- print("✓ Parsed using direct JSON")
176
- return result
177
- except json.JSONDecodeError:
178
- pass
179
-
180
- # Method 2: Extract JSON from text using regex
181
- json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
182
  if json_match:
183
- try:
184
- json_str = json_match.group(0)
185
- data = json.loads(json_str)
186
- result["generated_tags"] = data.get("tags", [])[:max_tags]
187
- result["primary_category"] = data.get("primary_category", "")
188
- result["secondary_categories"] = data.get("secondary_categories", [])
189
- result["keywords"] = data.get("keywords", [])
190
- result["hashtags"] = data.get("hashtags", [])
191
- result["target_audience"] = data.get("target_audience", [])
192
- result["sentiment"] = data.get("sentiment", "neutral")
193
- print("✓ Parsed using regex JSON extraction")
194
- return result
195
- except:
196
- pass
197
-
198
- # Method 3: Parse line by line (fallback)
199
- lines = response_text.strip().split('\n')
200
- for line in lines:
201
- line = line.strip()
202
- if not line:
203
- continue
204
-
205
- # Parse TAGS
206
- if 'tags' in line.lower() and ':' in line:
207
- # Extract array content
208
- match = re.search(r'\[(.*?)\]', line)
209
- if match:
210
- tags_str = match.group(1)
211
- tags = [t.strip().strip('"\'').lower() for t in tags_str.split(',') if t.strip()]
212
- result["generated_tags"] = tags[:max_tags]
213
-
214
- # Parse PRIMARY_CATEGORY
215
- elif 'primary_category' in line.lower() and ':' in line:
216
- value = line.split(':', 1)[1].strip().strip(',"\'')
217
- result["primary_category"] = value
218
-
219
- # Parse SECONDARY_CATEGORIES
220
- elif 'secondary_categories' in line.lower() and ':' in line:
221
- match = re.search(r'\[(.*?)\]', line)
222
- if match:
223
- cats_str = match.group(1)
224
- result["secondary_categories"] = [c.strip().strip('"\'') for c in cats_str.split(',') if c.strip()]
225
-
226
- # Parse KEYWORDS
227
- elif 'keywords' in line.lower() and ':' in line:
228
- match = re.search(r'\[(.*?)\]', line)
229
- if match:
230
- kw_str = match.group(1)
231
- result["keywords"] = [k.strip().strip('"\'') for k in kw_str.split(',') if k.strip()]
232
-
233
- # Parse HASHTAGS
234
- elif 'hashtags' in line.lower() and ':' in line:
235
- match = re.search(r'\[(.*?)\]', line)
236
- if match:
237
- ht_str = match.group(1)
238
- hashtags = [h.strip().strip('"\'') for h in ht_str.split(',') if h.strip()]
239
- result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
240
-
241
- # Parse TARGET_AUDIENCE
242
- elif 'target_audience' in line.lower() and ':' in line:
243
- match = re.search(r'\[(.*?)\]', line)
244
- if match:
245
- aud_str = match.group(1)
246
- result["target_audience"] = [a.strip().strip('"\'') for a in aud_str.split(',') if a.strip()]
247
-
248
- # Parse SENTIMENT
249
- elif 'sentiment' in line.lower() and ':' in line:
250
- sentiment = line.split(':', 1)[1].strip().strip(',"\'').lower()
251
- if sentiment in ['positive', 'neutral', 'negative']:
252
- result["sentiment"] = sentiment
253
-
254
- print("✓ Parsed using line-by-line fallback")
255
-
256
  except Exception as e:
257
  print(f"✗ Parsing error: {str(e)}")
258
-
259
  return result
260
 
261
 
262
- @app.post("/generate-tags", response_model=EventTagsResponse)
263
- async def generate_tags(request: EventTagsRequest):
264
- """
265
- Generate comprehensive tags and metadata for an event
266
- """
267
-
268
  try:
269
  start_time = datetime.utcnow()
270
-
271
- # Get token
272
  token = request.hf_token or hf_token
273
-
274
  if not token:
275
- raise HTTPException(
276
- status_code=401,
277
- detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
278
- )
279
-
280
- # Build powerful prompt
281
- prompt = build_powerful_prompt(
282
- event_name=request.event_name,
283
- category=request.category,
284
- short_desc=request.short_description,
285
- detailed_desc=request.detailed_description,
286
- max_tags=request.max_tags,
287
- language=request.language
288
  )
289
-
290
- # Initialize HF client
291
  client = InferenceClient(token=token)
292
-
293
- # Try multiple models for best results
294
  models_to_try = [
295
- "mistralai/Mistral-7B-Instruct-v0.3",
296
- "microsoft/Phi-3-mini-4k-instruct",
297
- "HuggingFaceH4/zephyr-7b-beta",
298
- "meta-llama/Llama-3.2-3B-Instruct",
299
- "meta-llama/Meta-Llama-3-8B-Instruct"
300
  ]
301
-
302
  llm_response = ""
303
  model_used = ""
304
- last_error = None
305
-
306
- for model_name in models_to_try:
307
  try:
308
- print(f"Trying model: {model_name}")
309
-
310
- # Format messages
311
- messages = [
312
- {
313
- "role": "user",
314
- "content": prompt
315
- }
316
- ]
317
-
318
- # Generate with chat_completion
319
  response = client.chat_completion(
320
- messages=messages,
321
- model=model_name,
322
- max_tokens=1000, # Increased for more content
323
- temperature=0.3, # Lower temperature for more consistent output
324
- top_p=0.9
325
  )
326
-
327
- # Get response content
328
  llm_response = response.choices[0].message.content
329
-
330
- if llm_response and len(llm_response.strip()) > 20:
331
- model_used = model_name
332
- print(f"✓ Success with {model_name}")
333
  break
334
-
335
- except Exception as model_error:
336
- print(f"✗ Failed with {model_name}: {str(model_error)}")
337
- last_error = model_error
338
  continue
339
-
340
- # Check if generation succeeded
341
- if not llm_response or len(llm_response.strip()) < 20:
342
- raise HTTPException(
343
- status_code=500,
344
- detail=f"All models failed. Last error: {str(last_error)}"
345
- )
346
-
347
- # Parse LLM response
348
- parsed_result = parse_llm_response(llm_response, request.max_tags)
349
-
350
- # If parsing failed, create basic fallback tags
351
- if not parsed_result["generated_tags"]:
352
- print("⚠ Warning: No tags parsed, creating fallback tags")
353
- # Create basic tags from event info
354
- fallback_tags = []
355
- # Add category as tag
356
- if request.category:
357
- fallback_tags.append(request.category.lower())
358
- # Extract words from event name
359
- name_words = [w.lower() for w in request.event_name.split() if len(w) > 3]
360
- fallback_tags.extend(name_words[:3])
361
-
362
- parsed_result["generated_tags"] = fallback_tags[:request.max_tags]
363
- parsed_result["primary_category"] = request.category
364
- parsed_result["sentiment"] = "positive"
365
-
366
- # Calculate confidence score
367
- confidence = 0.0
368
- if parsed_result["generated_tags"]:
369
- confidence += 0.3
370
- if parsed_result["primary_category"]:
371
- confidence += 0.2
372
- if parsed_result["keywords"]:
373
- confidence += 0.2
374
- if parsed_result["hashtags"]:
375
- confidence += 0.15
376
- if parsed_result["target_audience"]:
377
- confidence += 0.15
378
-
379
  end_time = datetime.utcnow()
380
- generation_time = (end_time - start_time).total_seconds()
381
-
382
- # Build response
383
- return EventTagsResponse(
384
  event_name=request.event_name,
385
- generated_tags=parsed_result["generated_tags"],
386
- primary_category=parsed_result["primary_category"],
387
- secondary_categories=parsed_result["secondary_categories"],
388
- keywords=parsed_result["keywords"],
389
- hashtags=parsed_result["hashtags"],
390
- target_audience=parsed_result["target_audience"],
391
- sentiment=parsed_result["sentiment"],
392
  confidence_score=round(confidence, 2),
393
- generation_time=f"{generation_time:.2f}s",
394
- model_used=model_used.split('/')[-1] if model_used else "unknown"
395
  )
396
-
397
  except HTTPException:
398
  raise
399
  except Exception as e:
400
- raise HTTPException(
401
- status_code=500,
402
- detail=f"Error generating tags: {str(e)}"
403
- )
404
-
405
-
406
- @app.post("/generate-tags/batch")
407
- async def generate_tags_batch(events: List[EventTagsRequest]):
408
- """
409
- Batch generate tags for multiple events
410
- """
411
- results = []
412
-
413
- for event in events:
414
- try:
415
- result = await generate_tags(event)
416
- results.append({
417
- "event_name": event.event_name,
418
- "success": True,
419
- "data": result
420
- })
421
- except Exception as e:
422
- results.append({
423
- "event_name": event.event_name,
424
- "success": False,
425
- "error": str(e)
426
- })
427
-
428
- return {
429
- "total": len(events),
430
- "successful": sum(1 for r in results if r["success"]),
431
- "failed": sum(1 for r in results if not r["success"]),
432
- "results": results
433
- }
434
-
435
 
436
 
437
  if __name__ == "__main__":
438
- import os
439
  uvicorn.run(
440
  "app:app",
441
  host="0.0.0.0",
442
- port=int(os.environ.get("PORT", 7860)),
443
- reload=False,
444
- log_level="info"
445
- )
 
1
  """
2
+ Event Hashtag Generator - AI Chatbot for automatic hashtag generation
3
+ Generates viral hashtags, keywords, and target audience insights from event data
4
  """
5
 
6
  from fastapi import FastAPI, HTTPException
 
16
 
17
  # Initialize FastAPI
18
  app = FastAPI(
19
+ title="Event Hashtag Generator API",
20
+ description="AI-powered automatic hashtag and keyword generation for events",
21
+ version="2.0.0"
22
  )
23
 
24
  # CORS middleware
 
39
 
40
 
41
  # Pydantic models
42
+ class EventHashtagRequest(BaseModel):
43
  event_name: str
44
  category: str
45
  short_description: str
46
  detailed_description: str
47
+ max_hashtags: Optional[int] = 10
48
+ language: Optional[str] = "vi"
49
  hf_token: Optional[str] = None
50
 
51
 
52
+ class EventHashtagResponse(BaseModel):
53
  event_name: str
 
 
 
 
54
  hashtags: List[str]
55
+ keywords: List[str]
56
  target_audience: List[str]
 
57
  confidence_score: float
58
  generation_time: str
59
  model_used: str
 
64
  """API Information"""
65
  return {
66
  "status": "running",
67
+ "service": "Event Hashtag Generator API",
68
+ "version": "2.0.0",
69
+ "description": "Generate hashtags, keywords, and target audience from event info",
70
  "endpoints": {
71
+ "POST /generate-hashtags": {
72
+ "description": "Generate viral hashtags for events",
73
  "request_body": {
74
  "event_name": "string - Tên sự kiện",
75
  "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
76
  "short_description": "string - Mô tả ngắn (1-2 câu)",
77
  "detailed_description": "string - Mô tả chi tiết",
78
+ "max_hashtags": "integer (optional, default: 10)",
79
+ "language": "string (optional, default: 'vi')",
80
+ "hf_token": "string (optional)"
81
  }
82
  }
83
+ }
 
84
  }
85
 
86
 
87
+ def build_hashtag_prompt(event_name: str, category: str, short_desc: str, detailed_desc: str, max_hashtags: int, language: str) -> str:
88
+ """Prompt chỉ tập trung vào hashtag, keywords và audience."""
 
 
 
 
 
 
 
 
 
 
89
  lang_instruction = "tiếng Việt" if language == "vi" else "English"
90
+ prompt = f"""Phân tích sự kiện sau và tạo ra các hashtag lan truyền mạnh mẽ, cùng với từ khóa và đối tượng mục tiêu.
 
 
91
 
92
  SỰ KIỆN:
93
  Tên: {event_name}
 
95
  Mô tả ngắn: {short_desc}
96
  Mô tả chi tiết: {detailed_desc}
97
 
98
+ YÊU CẦU:
99
+ - Tạo tối đa {max_hashtags} hashtag độc đáo, dễ nhớ, dễ viral, liên quan đến sự kiện.
100
+ - Mỗi hashtag phải bắt đầu bằng #.
101
+ - Ngôn ngữ: {lang_instruction}.
102
+ - Cung cấp thêm:
103
+ - Danh sách từ khóa (keywords) liên quan đến sự kiện.
104
+ - Danh sách đối tượng khán giả mục tiêu (target audience) phù hợp.
105
+ - Không trả lời giải thích, chỉ xuất JSON.
106
 
107
+ JSON OUTPUT:
108
  {{
109
+ "hashtags": ["#TênSựKiện", "#Hashtag2", "#Hashtag3"],
110
+ "keywords": ["keyword1", "keyword2"],
111
+ "target_audience": ["đối tượng 1", "đối tượng 2"]
 
 
 
 
112
  }}
113
+ CHỈ TRẢ VỀ JSON, KHÔNG THÊM TEXT KHÁC.
114
+ """
 
 
 
 
 
 
 
 
115
  return prompt
116
 
117
 
118
+ def parse_llm_response(response_text: str) -> dict:
119
+ """Parse JSON từ model trả về."""
120
+ result = {"hashtags": [], "keywords": [], "target_audience": []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  try:
122
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  if json_match:
124
+ data = json.loads(json_match.group(0))
125
+ result["hashtags"] = data.get("hashtags", [])
126
+ result["keywords"] = data.get("keywords", [])
127
+ result["target_audience"] = data.get("target_audience", [])
128
+ print(" Parsed JSON successfully")
129
+ else:
130
+ print(" No valid JSON found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
  print(f"✗ Parsing error: {str(e)}")
 
133
  return result
134
 
135
 
136
+ @app.post("/generate-hashtags", response_model=EventHashtagResponse)
137
+ async def generate_hashtags(request: EventHashtagRequest):
138
+ """Generate viral hashtags, keywords, and target audience for an event."""
 
 
 
139
  try:
140
  start_time = datetime.utcnow()
141
+
 
142
  token = request.hf_token or hf_token
 
143
  if not token:
144
+ raise HTTPException(status_code=401, detail="HUGGINGFACE_TOKEN required.")
145
+
146
+ prompt = build_hashtag_prompt(
147
+ request.event_name,
148
+ request.category,
149
+ request.short_description,
150
+ request.detailed_description,
151
+ request.max_hashtags,
152
+ request.language
 
 
 
 
153
  )
154
+
 
155
  client = InferenceClient(token=token)
 
 
156
  models_to_try = [
157
+ "KiLM-13b",
158
+ "Viet-Mistral/Vistral-7B-Chat",
159
+ "vilm-ai/VinaLLaMA-7B-chat"
 
 
160
  ]
161
+
162
  llm_response = ""
163
  model_used = ""
164
+ for model in models_to_try:
 
 
165
  try:
166
+ print(f"Trying model: {model}")
 
 
 
 
 
 
 
 
 
 
167
  response = client.chat_completion(
168
+ model=model,
169
+ messages=[{"role": "user", "content": prompt}],
170
+ max_tokens=800,
171
+ temperature=0.6,
 
172
  )
 
 
173
  llm_response = response.choices[0].message.content
174
+ if llm_response and len(llm_response) > 20:
175
+ model_used = model
 
 
176
  break
177
+ except Exception as e:
178
+ print(f"✗ Failed with {model}: {e}")
 
 
179
  continue
180
+
181
+ if not llm_response:
182
+ raise HTTPException(status_code=500, detail="All models failed to respond.")
183
+
184
+ parsed = parse_llm_response(llm_response)
185
+
186
+ # Fallback nếu model không trả được hashtag
187
+ if not parsed["hashtags"]:
188
+ print("⚠ Creating fallback hashtags")
189
+ base = re.sub(r'[^a-zA-Z0-9 ]', '', request.event_name)
190
+ words = base.split()
191
+ parsed["hashtags"] = [f"#{w.capitalize()}" for w in words[:request.max_hashtags]]
192
+
193
+ # Tính confidence đơn giản
194
+ confidence = 0.3 * bool(parsed["hashtags"]) + 0.3 * bool(parsed["keywords"]) + 0.4 * bool(parsed["target_audience"])
195
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  end_time = datetime.utcnow()
197
+ return EventHashtagResponse(
 
 
 
198
  event_name=request.event_name,
199
+ hashtags=parsed["hashtags"][:request.max_hashtags],
200
+ keywords=parsed["keywords"],
201
+ target_audience=parsed["target_audience"],
 
 
 
 
202
  confidence_score=round(confidence, 2),
203
+ generation_time=f"{(end_time - start_time).total_seconds():.2f}s",
204
+ model_used=model_used.split("/")[-1],
205
  )
206
+
207
  except HTTPException:
208
  raise
209
  except Exception as e:
210
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
 
213
  if __name__ == "__main__":
 
214
  uvicorn.run(
215
  "app:app",
216
  host="0.0.0.0",
217
+ port=int(os.environ.get("PORT", 7860)),
218
+ reload=False,
219
+ log_level="info",
220
+ )