minh9972t12 commited on
Commit
92405b2
·
verified ·
1 Parent(s): fdc8d37

Create event_tags_generator.py

Browse files
Files changed (1) hide show
  1. event_tags_generator.py +430 -0
event_tags_generator.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Event Tags Generator - AI Chatbot for automatic tag generation
3
+ Generates relevant tags, keywords, and categories from event information
4
+ """
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+ from typing import Optional, List
10
+ from datetime import datetime
11
+ import os
12
+ from huggingface_hub import InferenceClient
13
+
14
+ # Initialize FastAPI
15
+ app = FastAPI(
16
+ title="Event Tags Generator API",
17
+ description="AI-powered automatic tag generation for events using LLM",
18
+ version="1.0.0"
19
+ )
20
+
21
+ # CORS middleware
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Hugging Face token
31
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
32
+ if hf_token:
33
+ print("✓ Hugging Face token configured")
34
+ else:
35
+ print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
36
+
37
+
38
+ # Pydantic models
39
+ class EventTagsRequest(BaseModel):
40
+ event_name: str
41
+ category: str
42
+ short_description: str
43
+ detailed_description: str
44
+ max_tags: Optional[int] = 10
45
+ language: Optional[str] = "vi" # vi = Vietnamese, en = English
46
+ hf_token: Optional[str] = None
47
+
48
+
49
+ class EventTagsResponse(BaseModel):
50
+ event_name: str
51
+ generated_tags: List[str]
52
+ primary_category: str
53
+ secondary_categories: List[str]
54
+ keywords: List[str]
55
+ hashtags: List[str]
56
+ target_audience: List[str]
57
+ sentiment: str
58
+ confidence_score: float
59
+ generation_time: str
60
+ model_used: str
61
+
62
+
63
+ @app.get("/")
64
+ async def root():
65
+ """API Information"""
66
+ return {
67
+ "status": "running",
68
+ "service": "Event Tags Generator API",
69
+ "version": "1.0.0",
70
+ "description": "Generate tags, keywords, categories automatically from event info",
71
+ "endpoints": {
72
+ "POST /generate-tags": {
73
+ "description": "Generate tags from event information",
74
+ "request_body": {
75
+ "event_name": "string - Tên sự kiện",
76
+ "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
77
+ "short_description": "string - Mô tả ngắn (1-2 câu)",
78
+ "detailed_description": "string - Mô tả chi tiết",
79
+ "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
80
+ "language": "string (optional, default: 'vi') - Ngôn ngữ output",
81
+ "hf_token": "string (optional) - Hugging Face token"
82
+ },
83
+ "response": {
84
+ "generated_tags": "array - Danh sách tags",
85
+ "primary_category": "string - Danh mục chính",
86
+ "secondary_categories": "array - Danh mục phụ",
87
+ "keywords": "array - Keywords SEO",
88
+ "hashtags": "array - Social media hashtags",
89
+ "target_audience": "array - Đối tượng mục tiêu",
90
+ "sentiment": "string - Cảm xúc (positive/neutral/negative)",
91
+ "confidence_score": "float - Độ tin cậy (0-1)"
92
+ },
93
+ "example": {
94
+ "request": {
95
+ "event_name": "Vietnam Music Festival 2025",
96
+ "category": "Âm nhạc",
97
+ "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
98
+ "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
99
+ },
100
+ "response": {
101
+ "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
102
+ "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
103
+ }
104
+ }
105
+ }
106
+ },
107
+ "usage": "POST /generate-tags with event information in JSON body"
108
+ }
109
+
110
+
111
+ def build_powerful_prompt(
112
+ event_name: str,
113
+ category: str,
114
+ short_desc: str,
115
+ detailed_desc: str,
116
+ max_tags: int,
117
+ language: str
118
+ ) -> str:
119
+ """
120
+ Build a powerful, structured prompt for LLM to generate high-quality tags
121
+ """
122
+
123
+ lang_instruction = "in Vietnamese" if language == "vi" else "in English"
124
+
125
+ prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
126
+
127
+ **EVENT INFORMATION:**
128
+ • Event Name: {event_name}
129
+ • Primary Category: {category}
130
+ • Short Description: {short_desc}
131
+ • Detailed Description: {detailed_desc}
132
+
133
+ **YOUR TASK:**
134
+ Analyze the event information above and generate the following {lang_instruction}:
135
+
136
+ 1. **TAGS** ({max_tags} tags maximum):
137
+ - Generate specific, relevant, searchable tags
138
+ - Include event type, theme, activities, location references
139
+ - Mix broad and specific tags for better discoverability
140
+ - Use lowercase, single words or short phrases
141
+ - Example format: âm nhạc, festival, concert, outdoor, hà nội
142
+
143
+ 2. **PRIMARY CATEGORY** (1 category):
144
+ - The main category that best describes this event
145
+ - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
146
+
147
+ 3. **SECONDARY CATEGORIES** (2-3 categories):
148
+ - Additional relevant categories
149
+ - Help with cross-categorization
150
+
151
+ 4. **KEYWORDS** (5-8 keywords):
152
+ - SEO-optimized keywords for search engines
153
+ - Include long-tail keywords
154
+ - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
155
+
156
+ 5. **HASHTAGS** (5-7 hashtags):
157
+ - Social media friendly hashtags
158
+ - Mix of popular and unique hashtags
159
+ - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
160
+
161
+ 6. **TARGET AUDIENCE** (2-4 audience groups):
162
+ - Who would be interested in this event?
163
+ - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
164
+
165
+ 7. **SENTIMENT** (one word):
166
+ - Overall emotion/feeling: positive, neutral, or negative
167
+ - Based on event description tone
168
+
169
+ **OUTPUT FORMAT (JSON-like structure):**
170
+ TAGS: tag1, tag2, tag3, ...
171
+ PRIMARY_CATEGORY: category_name
172
+ SECONDARY_CATEGORIES: cat1, cat2, cat3
173
+ KEYWORDS: keyword1, keyword2, keyword3, ...
174
+ HASHTAGS: #tag1, #tag2, #tag3, ...
175
+ TARGET_AUDIENCE: audience1, audience2, audience3
176
+ SENTIMENT: positive/neutral/negative
177
+
178
+ **IMPORTANT GUIDELINES:**
179
+ - Be specific and relevant to the event
180
+ - Use terms people would actually search for
181
+ - Balance between popular and niche terms
182
+ - Consider SEO and social media best practices
183
+ - Keep tags concise and meaningful
184
+ - Generate output {lang_instruction}
185
+
186
+ Now, analyze the event and generate the metadata:"""
187
+
188
+ return prompt
189
+
190
+
191
+ def parse_llm_response(response_text: str, max_tags: int) -> dict:
192
+ """
193
+ Parse LLM response into structured format
194
+ Handles various response formats robustly
195
+ """
196
+
197
+ result = {
198
+ "generated_tags": [],
199
+ "primary_category": "",
200
+ "secondary_categories": [],
201
+ "keywords": [],
202
+ "hashtags": [],
203
+ "target_audience": [],
204
+ "sentiment": "neutral"
205
+ }
206
+
207
+ lines = response_text.strip().split('\n')
208
+
209
+ for line in lines:
210
+ line = line.strip()
211
+ if not line:
212
+ continue
213
+
214
+ # Parse TAGS
215
+ if line.upper().startswith('TAGS:'):
216
+ tags_text = line.split(':', 1)[1].strip()
217
+ tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
218
+ result["generated_tags"] = tags[:max_tags]
219
+
220
+ # Parse PRIMARY_CATEGORY
221
+ elif line.upper().startswith('PRIMARY_CATEGORY:'):
222
+ result["primary_category"] = line.split(':', 1)[1].strip()
223
+
224
+ # Parse SECONDARY_CATEGORIES
225
+ elif line.upper().startswith('SECONDARY_CATEGORIES:'):
226
+ cats_text = line.split(':', 1)[1].strip()
227
+ result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
228
+
229
+ # Parse KEYWORDS
230
+ elif line.upper().startswith('KEYWORDS:'):
231
+ kw_text = line.split(':', 1)[1].strip()
232
+ result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
233
+
234
+ # Parse HASHTAGS
235
+ elif line.upper().startswith('HASHTAGS:'):
236
+ ht_text = line.split(':', 1)[1].strip()
237
+ hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
238
+ # Ensure hashtags start with #
239
+ result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
240
+
241
+ # Parse TARGET_AUDIENCE
242
+ elif line.upper().startswith('TARGET_AUDIENCE:'):
243
+ aud_text = line.split(':', 1)[1].strip()
244
+ result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
245
+
246
+ # Parse SENTIMENT
247
+ elif line.upper().startswith('SENTIMENT:'):
248
+ sentiment = line.split(':', 1)[1].strip().lower()
249
+ if sentiment in ['positive', 'neutral', 'negative']:
250
+ result["sentiment"] = sentiment
251
+
252
+ return result
253
+
254
+
255
+ @app.post("/generate-tags", response_model=EventTagsResponse)
256
+ async def generate_tags(request: EventTagsRequest):
257
+ """
258
+ Generate comprehensive tags and metadata for an event
259
+
260
+ This endpoint uses advanced LLM prompting to generate:
261
+ - Relevant tags for searchability
262
+ - Category classification
263
+ - SEO keywords
264
+ - Social media hashtags
265
+ - Target audience identification
266
+ - Sentiment analysis
267
+
268
+ **Input:**
269
+ - event_name: Name of the event
270
+ - category: Primary category (music, sports, tech, etc.)
271
+ - short_description: Brief 1-2 sentence description
272
+ - detailed_description: Full event description with details
273
+
274
+ **Output:**
275
+ - Structured metadata ready for use in event management system
276
+ - All fields optimized for search and discovery
277
+ """
278
+
279
+ try:
280
+ start_time = datetime.utcnow()
281
+
282
+ # Get token
283
+ token = request.hf_token or hf_token
284
+
285
+ if not token:
286
+ raise HTTPException(
287
+ status_code=401,
288
+ detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
289
+ )
290
+
291
+ # Build powerful prompt
292
+ prompt = build_powerful_prompt(
293
+ event_name=request.event_name,
294
+ category=request.category,
295
+ short_desc=request.short_description,
296
+ detailed_desc=request.detailed_description,
297
+ max_tags=request.max_tags,
298
+ language=request.language
299
+ )
300
+
301
+ # Initialize HF client
302
+ client = InferenceClient(token=token)
303
+
304
+ # Try multiple models for best results
305
+ models_to_try = [
306
+ "microsoft/Phi-3-mini-4k-instruct",
307
+ "mistralai/Mistral-7B-Instruct-v0.3",
308
+ "HuggingFaceH4/zephyr-7b-beta",
309
+ "meta-llama/Llama-3.2-3B-Instruct"
310
+ ]
311
+
312
+ llm_response = ""
313
+ model_used = ""
314
+ last_error = None
315
+
316
+ for model_name in models_to_try:
317
+ try:
318
+ print(f"Trying model: {model_name}")
319
+
320
+ # Generate with LLM
321
+ llm_response = client.text_generation(
322
+ prompt,
323
+ model=model_name,
324
+ max_new_tokens=800,
325
+ temperature=0.7,
326
+ top_p=0.9,
327
+ do_sample=True,
328
+ return_full_text=False
329
+ )
330
+
331
+ if llm_response and len(llm_response.strip()) > 50:
332
+ model_used = model_name
333
+ print(f"✓ Success with {model_name}")
334
+ break
335
+
336
+ except Exception as model_error:
337
+ print(f"✗ Failed with {model_name}: {str(model_error)}")
338
+ last_error = model_error
339
+ continue
340
+
341
+ # Check if generation succeeded
342
+ if not llm_response or len(llm_response.strip()) < 50:
343
+ raise HTTPException(
344
+ status_code=500,
345
+ detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
346
+ )
347
+
348
+ # Parse LLM response into structured format
349
+ parsed_result = parse_llm_response(llm_response, request.max_tags)
350
+
351
+ # Calculate confidence score (basic heuristic)
352
+ confidence = 0.0
353
+ if parsed_result["generated_tags"]:
354
+ confidence += 0.3
355
+ if parsed_result["primary_category"]:
356
+ confidence += 0.2
357
+ if parsed_result["keywords"]:
358
+ confidence += 0.2
359
+ if parsed_result["hashtags"]:
360
+ confidence += 0.15
361
+ if parsed_result["target_audience"]:
362
+ confidence += 0.15
363
+
364
+ end_time = datetime.utcnow()
365
+ generation_time = (end_time - start_time).total_seconds()
366
+
367
+ # Build response
368
+ return EventTagsResponse(
369
+ event_name=request.event_name,
370
+ generated_tags=parsed_result["generated_tags"],
371
+ primary_category=parsed_result["primary_category"],
372
+ secondary_categories=parsed_result["secondary_categories"],
373
+ keywords=parsed_result["keywords"],
374
+ hashtags=parsed_result["hashtags"],
375
+ target_audience=parsed_result["target_audience"],
376
+ sentiment=parsed_result["sentiment"],
377
+ confidence_score=round(confidence, 2),
378
+ generation_time=f"{generation_time:.2f}s",
379
+ model_used=model_used.split('/')[-1] if model_used else "unknown"
380
+ )
381
+
382
+ except HTTPException:
383
+ raise
384
+ except Exception as e:
385
+ raise HTTPException(
386
+ status_code=500,
387
+ detail=f"Error generating tags: {str(e)}"
388
+ )
389
+
390
+
391
+ @app.post("/generate-tags/batch")
392
+ async def generate_tags_batch(events: List[EventTagsRequest]):
393
+ """
394
+ Batch generate tags for multiple events
395
+
396
+ Useful for bulk processing or migrating existing events
397
+ """
398
+ results = []
399
+
400
+ for event in events:
401
+ try:
402
+ result = await generate_tags(event)
403
+ results.append({
404
+ "event_name": event.event_name,
405
+ "success": True,
406
+ "data": result
407
+ })
408
+ except Exception as e:
409
+ results.append({
410
+ "event_name": event.event_name,
411
+ "success": False,
412
+ "error": str(e)
413
+ })
414
+
415
+ return {
416
+ "total": len(events),
417
+ "successful": sum(1 for r in results if r["success"]),
418
+ "failed": sum(1 for r in results if not r["success"]),
419
+ "results": results
420
+ }
421
+
422
+
423
+ if __name__ == "__main__":
424
+ import uvicorn
425
+ uvicorn.run(
426
+ app,
427
+ host="0.0.0.0",
428
+ port=8001, # Different port from main API
429
+ log_level="info"
430
+ )