minh9972t12 commited on
Commit
9de7bc2
·
verified ·
1 Parent(s): a694abf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +386 -130
app.py CHANGED
@@ -1,155 +1,411 @@
1
  """
2
- Test script for Event Tags Generator API
 
3
  """
4
 
5
- import requests
6
- import json
 
 
 
 
 
7
  import uvicorn
 
 
 
 
 
 
8
 
 
 
 
 
 
 
 
 
9
 
10
- def test_generate_tags():
11
- """Test single event tag generation"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- print("=" * 60)
14
- print("Testing Event Tags Generator")
15
- print("=" * 60)
16
 
17
- # Test data
18
- event_data = {
19
- "event_name": "Vietnam Music Festival 2025",
20
- "category": "Âm nhạc",
21
- "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam năm 2025",
22
- "detailed_description": """
23
- Vietnam Music Festival 2025 là sự kiện âm nhạc đỉnh cao quy tụ các nghệ sĩ
24
- nổi tiếng trong nước quốc tế. Sự kiện diễn ra trong 3 ngày với hơn 50
25
- nghệ sĩ tham gia, từ nhạc pop, rock, EDM đến acoustic. Đặc biệt sự góp
26
- mặt của các DJ hàng đầu thế giới. Không gian festival rộng 10,000m2 tại
27
- trung tâm Nội với hệ thống âm thanh ánh sáng hiện đại. Dự kiến thu hút
28
- hơn 30,000 khán giả mỗi ngày.
29
- """,
30
- "max_tags": 12,
31
- "language": "vi"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
 
34
- print("\n📤 REQUEST:")
35
- print(json.dumps(event_data, indent=2, ensure_ascii=False))
36
 
37
- try:
38
- # Call API
39
- response = requests.post(
40
- f"{BASE_URL}/generate-tags",
41
- json=event_data,
42
- headers={"Content-Type": "application/json"}
43
- )
44
-
45
- if response.status_code == 200:
46
- result = response.json()
47
-
48
- print("\n✅ SUCCESS!")
49
- print("\n📥 RESPONSE:")
50
- print(json.dumps(result, indent=2, ensure_ascii=False))
51
-
52
- print("\n" + "=" * 60)
53
- print("GENERATED METADATA:")
54
- print("=" * 60)
55
-
56
- print(f"\n🏷️ TAGS ({len(result['generated_tags'])} tags):")
57
- for tag in result['generated_tags']:
58
- print(f" • {tag}")
59
-
60
- print(f"\n📁 PRIMARY CATEGORY: {result['primary_category']}")
61
-
62
- if result['secondary_categories']:
63
- print(f"\n📂 SECONDARY CATEGORIES:")
64
- for cat in result['secondary_categories']:
65
- print(f" • {cat}")
66
 
67
- if result['keywords']:
68
- print(f"\n🔍 SEO KEYWORDS:")
69
- for kw in result['keywords']:
70
- print(f" • {kw}")
71
-
72
- if result['hashtags']:
73
- print(f"\n#️⃣ HASHTAGS:")
74
- for ht in result['hashtags']:
75
- print(f" {ht}")
76
-
77
- if result['target_audience']:
78
- print(f"\n👥 TARGET AUDIENCE:")
79
- for aud in result['target_audience']:
80
- print(f" {aud}")
81
-
82
- print(f"\n😊 SENTIMENT: {result['sentiment']}")
83
- print(f"💯 CONFIDENCE: {result['confidence_score']}")
84
- print(f"⏱️ GENERATION TIME: {result['generation_time']}")
85
- print(f"🤖 MODEL USED: {result['model_used']}")
86
-
87
- else:
88
- print(f"\n❌ ERROR: {response.status_code}")
89
- print(response.text)
90
-
91
- except requests.exceptions.ConnectionError:
92
- print("\n❌ ERROR: Cannot connect to API")
93
- print("Make sure the server is running: python event_tags_generator.py")
94
- except Exception as e:
95
- print(f"\n❌ ERROR: {str(e)}")
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
- def test_batch_generation():
99
- """Test batch event tag generation"""
 
 
100
 
101
- print("\n\n" + "=" * 60)
102
- print("Testing Batch Tag Generation")
103
- print("=" * 60)
 
 
 
 
104
 
105
- events = [
106
- {
107
- "event_name": "Tech Summit Vietnam 2025",
108
- "category": "Công nghệ",
109
- "short_description": "Hội nghị công nghệ lớn nhất Đông Nam Á",
110
- "detailed_description": "Sự kiện quy tụ các chuyên gia AI, Blockchain, Cloud Computing từ Google, Microsoft, Amazon...",
111
- "max_tags": 10,
112
- "language": "vi"
113
- },
114
- {
115
- "event_name": "Food Festival Saigon",
116
- "category": "Ẩm thực",
117
- "short_description": "Lễ hội ẩm thực đường phố Sài Gòn",
118
- "detailed_description": "Khám phá hơn 100 món ăn đường phố đặc trưng của Sài Gòn với các đầu bếp nổi tiếng...",
119
- "max_tags": 8,
120
- "language": "vi"
121
- }
122
- ]
123
 
124
- print(f"\n📤 Generating tags for {len(events)} events...")
 
 
 
125
 
126
  try:
127
- response = requests.post(
128
- f"{BASE_URL}/generate-tags/batch",
129
- json=events,
130
- headers={"Content-Type": "application/json"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  )
132
 
133
- if response.status_code == 200:
134
- result = response.json()
135
- print(f"\n✅ Batch completed!")
136
- print(f" Total: {result['total']}")
137
- print(f" Successful: {result['successful']}")
138
- print(f" Failed: {result['failed']}")
139
-
140
- for item in result['results']:
141
- if item['success']:
142
- print(f"\n✓ {item['event_name']}")
143
- print(f" Tags: {', '.join(item['data']['generated_tags'][:5])}...")
144
- else:
145
- print(f"\n✗ {item['event_name']}")
146
- print(f" Error: {item['error']}")
147
- else:
148
- print(f"\n❌ ERROR: {response.status_code}")
149
- print(response.text)
150
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  except Exception as e:
152
- print(f"\n❌ ERROR: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
 
 
1
  """
2
+ Event Tags Generator - AI Chatbot for automatic tag generation
3
+ Generates relevant tags, keywords, and categories from event information
4
  """
5
 
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+ from typing import Optional, List
10
+ from datetime import datetime
11
+ import os
12
+ from huggingface_hub import InferenceClient
13
  import uvicorn
14
+ # Initialize FastAPI
15
+ app = FastAPI(
16
+ title="Event Tags Generator API",
17
+ description="AI-powered automatic tag generation for events using LLM",
18
+ version="1.0.0"
19
+ )
20
 
21
+ # CORS middleware
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
 
30
+ # Hugging Face token
31
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
32
+ if hf_token:
33
+ print("✓ Hugging Face token configured")
34
+ else:
35
+ print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
36
+
37
+
38
+ # Pydantic models
39
+ class EventTagsRequest(BaseModel):
40
+ event_name: str
41
+ category: str
42
+ short_description: str
43
+ detailed_description: str
44
+ max_tags: Optional[int] = 10
45
+ language: Optional[str] = "vi" # vi = Vietnamese, en = English
46
+ hf_token: Optional[str] = None
47
+
48
+
49
+ class EventTagsResponse(BaseModel):
50
+ event_name: str
51
+ generated_tags: List[str]
52
+ primary_category: str
53
+ secondary_categories: List[str]
54
+ keywords: List[str]
55
+ hashtags: List[str]
56
+ target_audience: List[str]
57
+ sentiment: str
58
+ confidence_score: float
59
+ generation_time: str
60
+ model_used: str
61
+
62
+
63
+ @app.get("/")
64
+ async def root():
65
+ """API Information"""
66
+ return {
67
+ "status": "running",
68
+ "service": "Event Tags Generator API",
69
+ "version": "1.0.0",
70
+ "description": "Generate tags, keywords, categories automatically from event info",
71
+ "endpoints": {
72
+ "POST /generate-tags": {
73
+ "description": "Generate tags from event information",
74
+ "request_body": {
75
+ "event_name": "string - Tên sự kiện",
76
+ "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
77
+ "short_description": "string - Mô tả ngắn (1-2 câu)",
78
+ "detailed_description": "string - Mô tả chi tiết",
79
+ "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
80
+ "language": "string (optional, default: 'vi') - Ngôn ngữ output",
81
+ "hf_token": "string (optional) - Hugging Face token"
82
+ },
83
+ "response": {
84
+ "generated_tags": "array - Danh sách tags",
85
+ "primary_category": "string - Danh mục chính",
86
+ "secondary_categories": "array - Danh mục phụ",
87
+ "keywords": "array - Keywords SEO",
88
+ "hashtags": "array - Social media hashtags",
89
+ "target_audience": "array - Đối tượng mục tiêu",
90
+ "sentiment": "string - Cảm xúc (positive/neutral/negative)",
91
+ "confidence_score": "float - Độ tin cậy (0-1)"
92
+ },
93
+ "example": {
94
+ "request": {
95
+ "event_name": "Vietnam Music Festival 2025",
96
+ "category": "Âm nhạc",
97
+ "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
98
+ "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
99
+ },
100
+ "response": {
101
+ "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
102
+ "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
103
+ }
104
+ }
105
+ }
106
+ },
107
+ "usage": "POST /generate-tags with event information in JSON body"
108
+ }
109
+
110
+
111
+ def build_powerful_prompt(
112
+ event_name: str,
113
+ category: str,
114
+ short_desc: str,
115
+ detailed_desc: str,
116
+ max_tags: int,
117
+ language: str
118
+ ) -> str:
119
+ """
120
+ Build a powerful, structured prompt for LLM to generate high-quality tags
121
+ """
122
 
123
+ lang_instruction = "in Vietnamese" if language == "vi" else "in English"
 
 
124
 
125
+ prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
126
+ **EVENT INFORMATION:**
127
+ Event Name: {event_name}
128
+ • Primary Category: {category}
129
+ Short Description: {short_desc}
130
+ • Detailed Description: {detailed_desc}
131
+ **YOUR TASK:**
132
+ Analyze the event information above and generate the following {lang_instruction}:
133
+ 1. **TAGS** ({max_tags} tags maximum):
134
+ - Generate specific, relevant, searchable tags
135
+ - Include event type, theme, activities, location references
136
+ - Mix broad and specific tags for better discoverability
137
+ - Use lowercase, single words or short phrases
138
+ - Example format: âm nhạc, festival, concert, outdoor, hà nội
139
+ 2. **PRIMARY CATEGORY** (1 category):
140
+ - The main category that best describes this event
141
+ - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
142
+ 3. **SECONDARY CATEGORIES** (2-3 categories):
143
+ - Additional relevant categories
144
+ - Help with cross-categorization
145
+ 4. **KEYWORDS** (5-8 keywords):
146
+ - SEO-optimized keywords for search engines
147
+ - Include long-tail keywords
148
+ - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
149
+ 5. **HASHTAGS** (5-7 hashtags):
150
+ - Social media friendly hashtags
151
+ - Mix of popular and unique hashtags
152
+ - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
153
+ 6. **TARGET AUDIENCE** (2-4 audience groups):
154
+ - Who would be interested in this event?
155
+ - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
156
+ 7. **SENTIMENT** (one word):
157
+ - Overall emotion/feeling: positive, neutral, or negative
158
+ - Based on event description tone
159
+ **OUTPUT FORMAT (JSON-like structure):**
160
+ TAGS: tag1, tag2, tag3, ...
161
+ PRIMARY_CATEGORY: category_name
162
+ SECONDARY_CATEGORIES: cat1, cat2, cat3
163
+ KEYWORDS: keyword1, keyword2, keyword3, ...
164
+ HASHTAGS: #tag1, #tag2, #tag3, ...
165
+ TARGET_AUDIENCE: audience1, audience2, audience3
166
+ SENTIMENT: positive/neutral/negative
167
+ **IMPORTANT GUIDELINES:**
168
+ - Be specific and relevant to the event
169
+ - Use terms people would actually search for
170
+ - Balance between popular and niche terms
171
+ - Consider SEO and social media best practices
172
+ - Keep tags concise and meaningful
173
+ - Generate output {lang_instruction}
174
+ Now, analyze the event and generate the metadata:"""
175
+
176
+ return prompt
177
+
178
+
179
+ def parse_llm_response(response_text: str, max_tags: int) -> dict:
180
+ """
181
+ Parse LLM response into structured format
182
+ Handles various response formats robustly
183
+ """
184
+
185
+ result = {
186
+ "generated_tags": [],
187
+ "primary_category": "",
188
+ "secondary_categories": [],
189
+ "keywords": [],
190
+ "hashtags": [],
191
+ "target_audience": [],
192
+ "sentiment": "neutral"
193
  }
194
 
195
+ lines = response_text.strip().split('\n')
 
196
 
197
+ for line in lines:
198
+ line = line.strip()
199
+ if not line:
200
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ # Parse TAGS
203
+ if line.upper().startswith('TAGS:'):
204
+ tags_text = line.split(':', 1)[1].strip()
205
+ tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
206
+ result["generated_tags"] = tags[:max_tags]
207
+
208
+ # Parse PRIMARY_CATEGORY
209
+ elif line.upper().startswith('PRIMARY_CATEGORY:'):
210
+ result["primary_category"] = line.split(':', 1)[1].strip()
211
+
212
+ # Parse SECONDARY_CATEGORIES
213
+ elif line.upper().startswith('SECONDARY_CATEGORIES:'):
214
+ cats_text = line.split(':', 1)[1].strip()
215
+ result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
216
+
217
+ # Parse KEYWORDS
218
+ elif line.upper().startswith('KEYWORDS:'):
219
+ kw_text = line.split(':', 1)[1].strip()
220
+ result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
221
+
222
+ # Parse HASHTAGS
223
+ elif line.upper().startswith('HASHTAGS:'):
224
+ ht_text = line.split(':', 1)[1].strip()
225
+ hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
226
+ # Ensure hashtags start with #
227
+ result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
228
+
229
+ # Parse TARGET_AUDIENCE
230
+ elif line.upper().startswith('TARGET_AUDIENCE:'):
231
+ aud_text = line.split(':', 1)[1].strip()
232
+ result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
233
+
234
+ # Parse SENTIMENT
235
+ elif line.upper().startswith('SENTIMENT:'):
236
+ sentiment = line.split(':', 1)[1].strip().lower()
237
+ if sentiment in ['positive', 'neutral', 'negative']:
238
+ result["sentiment"] = sentiment
239
+
240
+ return result
241
 
242
 
243
+ @app.post("/generate-tags", response_model=EventTagsResponse)
244
+ async def generate_tags(request: EventTagsRequest):
245
+ """
246
+ Generate comprehensive tags and metadata for an event
247
 
248
+ This endpoint uses advanced LLM prompting to generate:
249
+ - Relevant tags for searchability
250
+ - Category classification
251
+ - SEO keywords
252
+ - Social media hashtags
253
+ - Target audience identification
254
+ - Sentiment analysis
255
 
256
+ **Input:**
257
+ - event_name: Name of the event
258
+ - category: Primary category (music, sports, tech, etc.)
259
+ - short_description: Brief 1-2 sentence description
260
+ - detailed_description: Full event description with details
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
+ **Output:**
263
+ - Structured metadata ready for use in event management system
264
+ - All fields optimized for search and discovery
265
+ """
266
 
267
  try:
268
+ start_time = datetime.utcnow()
269
+
270
+ # Get token
271
+ token = request.hf_token or hf_token
272
+
273
+ if not token:
274
+ raise HTTPException(
275
+ status_code=401,
276
+ detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
277
+ )
278
+
279
+ # Build powerful prompt
280
+ prompt = build_powerful_prompt(
281
+ event_name=request.event_name,
282
+ category=request.category,
283
+ short_desc=request.short_description,
284
+ detailed_desc=request.detailed_description,
285
+ max_tags=request.max_tags,
286
+ language=request.language
287
  )
288
 
289
+ # Initialize HF client
290
+ client = InferenceClient(token=token)
291
+
292
+ # Try multiple models for best results
293
+ models_to_try = [
294
+ "microsoft/Phi-3-mini-4k-instruct",
295
+ "mistralai/Mistral-7B-Instruct-v0.3",
296
+ "HuggingFaceH4/zephyr-7b-beta",
297
+ "meta-llama/Llama-3.2-3B-Instruct"
298
+ ]
299
+
300
+ llm_response = ""
301
+ model_used = ""
302
+ last_error = None
303
+
304
+ for model_name in models_to_try:
305
+ try:
306
+ print(f"Trying model: {model_name}")
307
+
308
+ # Generate with LLM
309
+ llm_response = client.text_generation(
310
+ prompt,
311
+ model=model_name,
312
+ max_new_tokens=800,
313
+ temperature=0.7,
314
+ top_p=0.9,
315
+ do_sample=True,
316
+ return_full_text=False
317
+ )
318
+
319
+ if llm_response and len(llm_response.strip()) > 50:
320
+ model_used = model_name
321
+ print(f"✓ Success with {model_name}")
322
+ break
323
+
324
+ except Exception as model_error:
325
+ print(f"✗ Failed with {model_name}: {str(model_error)}")
326
+ last_error = model_error
327
+ continue
328
+
329
+ # Check if generation succeeded
330
+ if not llm_response or len(llm_response.strip()) < 50:
331
+ raise HTTPException(
332
+ status_code=500,
333
+ detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
334
+ )
335
+
336
+ # Parse LLM response into structured format
337
+ parsed_result = parse_llm_response(llm_response, request.max_tags)
338
+
339
+ # Calculate confidence score (basic heuristic)
340
+ confidence = 0.0
341
+ if parsed_result["generated_tags"]:
342
+ confidence += 0.3
343
+ if parsed_result["primary_category"]:
344
+ confidence += 0.2
345
+ if parsed_result["keywords"]:
346
+ confidence += 0.2
347
+ if parsed_result["hashtags"]:
348
+ confidence += 0.15
349
+ if parsed_result["target_audience"]:
350
+ confidence += 0.15
351
+
352
+ end_time = datetime.utcnow()
353
+ generation_time = (end_time - start_time).total_seconds()
354
+
355
+ # Build response
356
+ return EventTagsResponse(
357
+ event_name=request.event_name,
358
+ generated_tags=parsed_result["generated_tags"],
359
+ primary_category=parsed_result["primary_category"],
360
+ secondary_categories=parsed_result["secondary_categories"],
361
+ keywords=parsed_result["keywords"],
362
+ hashtags=parsed_result["hashtags"],
363
+ target_audience=parsed_result["target_audience"],
364
+ sentiment=parsed_result["sentiment"],
365
+ confidence_score=round(confidence, 2),
366
+ generation_time=f"{generation_time:.2f}s",
367
+ model_used=model_used.split('/')[-1] if model_used else "unknown"
368
+ )
369
+
370
+ except HTTPException:
371
+ raise
372
  except Exception as e:
373
+ raise HTTPException(
374
+ status_code=500,
375
+ detail=f"Error generating tags: {str(e)}"
376
+ )
377
+
378
+
379
+ @app.post("/generate-tags/batch")
380
+ async def generate_tags_batch(events: List[EventTagsRequest]):
381
+ """
382
+ Batch generate tags for multiple events
383
+
384
+ Useful for bulk processing or migrating existing events
385
+ """
386
+ results = []
387
+
388
+ for event in events:
389
+ try:
390
+ result = await generate_tags(event)
391
+ results.append({
392
+ "event_name": event.event_name,
393
+ "success": True,
394
+ "data": result
395
+ })
396
+ except Exception as e:
397
+ results.append({
398
+ "event_name": event.event_name,
399
+ "success": False,
400
+ "error": str(e)
401
+ })
402
+
403
+ return {
404
+ "total": len(events),
405
+ "successful": sum(1 for r in results if r["success"]),
406
+ "failed": sum(1 for r in results if not r["success"]),
407
+ "results": results
408
+ }
409
 
410
 
411