minh9972t12 commited on
Commit
22304be
·
verified ·
1 Parent(s): 47ee52f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -162
app.py CHANGED
@@ -9,6 +9,8 @@ from pydantic import BaseModel
9
  from typing import Optional, List
10
  from datetime import datetime
11
  import os
 
 
12
  from huggingface_hub import InferenceClient
13
  import uvicorn
14
 
@@ -16,7 +18,7 @@ import uvicorn
16
  app = FastAPI(
17
  title="Event Tags Generator API",
18
  description="AI-powered automatic tag generation for events using LLM",
19
- version="1.0.0"
20
  )
21
 
22
  # CORS middleware
@@ -67,7 +69,7 @@ async def root():
67
  return {
68
  "status": "running",
69
  "service": "Event Tags Generator API",
70
- "version": "1.0.0",
71
  "description": "Generate tags, keywords, categories automatically from event info",
72
  "endpoints": {
73
  "POST /generate-tags": {
@@ -80,28 +82,6 @@ async def root():
80
  "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
81
  "language": "string (optional, default: 'vi') - Ngôn ngữ output",
82
  "hf_token": "string (optional) - Hugging Face token"
83
- },
84
- "response": {
85
- "generated_tags": "array - Danh sách tags",
86
- "primary_category": "string - Danh mục chính",
87
- "secondary_categories": "array - Danh mục phụ",
88
- "keywords": "array - Keywords SEO",
89
- "hashtags": "array - Social media hashtags",
90
- "target_audience": "array - Đối tượng mục tiêu",
91
- "sentiment": "string - Cảm xúc (positive/neutral/negative)",
92
- "confidence_score": "float - Độ tin cậy (0-1)"
93
- },
94
- "example": {
95
- "request": {
96
- "event_name": "Vietnam Music Festival 2025",
97
- "category": "Âm nhạc",
98
- "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
99
- "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
100
- },
101
- "response": {
102
- "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
103
- "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
104
- }
105
  }
106
  }
107
  },
@@ -118,83 +98,50 @@ def build_powerful_prompt(
118
  language: str
119
  ) -> str:
120
  """
121
- Build a powerful, structured prompt for LLM to generate high-quality tags
122
  """
123
 
124
- lang_instruction = "in Vietnamese" if language == "vi" else "in English"
125
 
126
- prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
127
-
128
- **EVENT INFORMATION:**
129
- Event Name: {event_name}
130
- • Primary Category: {category}
131
- Short Description: {short_desc}
132
- Detailed Description: {detailed_desc}
133
-
134
- **YOUR TASK:**
135
- Analyze the event information above and generate the following {lang_instruction}:
136
-
137
- 1. **TAGS** ({max_tags} tags maximum):
138
- - Generate specific, relevant, searchable tags
139
- - Include event type, theme, activities, location references
140
- - Mix broad and specific tags for better discoverability
141
- - Use lowercase, single words or short phrases
142
- - Example format: âm nhạc, festival, concert, outdoor, hà nội
143
-
144
- 2. **PRIMARY CATEGORY** (1 category):
145
- - The main category that best describes this event
146
- - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
147
-
148
- 3. **SECONDARY CATEGORIES** (2-3 categories):
149
- - Additional relevant categories
150
- - Help with cross-categorization
151
-
152
- 4. **KEYWORDS** (5-8 keywords):
153
- - SEO-optimized keywords for search engines
154
- - Include long-tail keywords
155
- - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
156
-
157
- 5. **HASHTAGS** (5-7 hashtags):
158
- - Social media friendly hashtags
159
- - Mix of popular and unique hashtags
160
- - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
161
-
162
- 6. **TARGET AUDIENCE** (2-4 audience groups):
163
- - Who would be interested in this event?
164
- - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
165
-
166
- 7. **SENTIMENT** (one word):
167
- - Overall emotion/feeling: positive, neutral, or negative
168
- - Based on event description tone
169
-
170
- **OUTPUT FORMAT (JSON-like structure):**
171
- TAGS: tag1, tag2, tag3, ...
172
- PRIMARY_CATEGORY: category_name
173
- SECONDARY_CATEGORIES: cat1, cat2, cat3
174
- KEYWORDS: keyword1, keyword2, keyword3, ...
175
- HASHTAGS: #tag1, #tag2, #tag3, ...
176
- TARGET_AUDIENCE: audience1, audience2, audience3
177
- SENTIMENT: positive/neutral/negative
178
-
179
- **IMPORTANT GUIDELINES:**
180
- - Be specific and relevant to the event
181
- - Use terms people would actually search for
182
- - Balance between popular and niche terms
183
- - Consider SEO and social media best practices
184
- - Keep tags concise and meaningful
185
- - Generate output {lang_instruction}
186
-
187
- Now, analyze the event and generate the metadata:"""
188
 
189
  return prompt
190
 
191
 
192
  def parse_llm_response(response_text: str, max_tags: int) -> dict:
193
  """
194
- Parse LLM response into structured format
195
- Handles various response formats robustly
196
  """
197
 
 
198
  result = {
199
  "generated_tags": [],
200
  "primary_category": "",
@@ -205,50 +152,109 @@ def parse_llm_response(response_text: str, max_tags: int) -> dict:
205
  "sentiment": "neutral"
206
  }
207
 
208
- lines = response_text.strip().split('\n')
 
 
 
 
 
209
 
210
- for line in lines:
211
- line = line.strip()
212
- if not line:
213
- continue
214
-
215
- # Parse TAGS
216
- if line.upper().startswith('TAGS:'):
217
- tags_text = line.split(':', 1)[1].strip()
218
- tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
219
- result["generated_tags"] = tags[:max_tags]
220
-
221
- # Parse PRIMARY_CATEGORY
222
- elif line.upper().startswith('PRIMARY_CATEGORY:'):
223
- result["primary_category"] = line.split(':', 1)[1].strip()
224
-
225
- # Parse SECONDARY_CATEGORIES
226
- elif line.upper().startswith('SECONDARY_CATEGORIES:'):
227
- cats_text = line.split(':', 1)[1].strip()
228
- result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
229
 
230
- # Parse KEYWORDS
231
- elif line.upper().startswith('KEYWORDS:'):
232
- kw_text = line.split(':', 1)[1].strip()
233
- result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
- # Parse HASHTAGS
236
- elif line.upper().startswith('HASHTAGS:'):
237
- ht_text = line.split(':', 1)[1].strip()
238
- hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
239
- # Ensure hashtags start with #
240
- result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- # Parse TARGET_AUDIENCE
243
- elif line.upper().startswith('TARGET_AUDIENCE:'):
244
- aud_text = line.split(':', 1)[1].strip()
245
- result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
246
 
247
- # Parse SENTIMENT
248
- elif line.upper().startswith('SENTIMENT:'):
249
- sentiment = line.split(':', 1)[1].strip().lower()
250
- if sentiment in ['positive', 'neutral', 'negative']:
251
- result["sentiment"] = sentiment
252
 
253
  return result
254
 
@@ -257,24 +263,6 @@ def parse_llm_response(response_text: str, max_tags: int) -> dict:
257
  async def generate_tags(request: EventTagsRequest):
258
  """
259
  Generate comprehensive tags and metadata for an event
260
-
261
- This endpoint uses advanced LLM prompting to generate:
262
- - Relevant tags for searchability
263
- - Category classification
264
- - SEO keywords
265
- - Social media hashtags
266
- - Target audience identification
267
- - Sentiment analysis
268
-
269
- **Input:**
270
- - event_name: Name of the event
271
- - category: Primary category (music, sports, tech, etc.)
272
- - short_description: Brief 1-2 sentence description
273
- - detailed_description: Full event description with details
274
-
275
- **Output:**
276
- - Structured metadata ready for use in event management system
277
- - All fields optimized for search and discovery
278
  """
279
 
280
  try:
@@ -304,11 +292,11 @@ async def generate_tags(request: EventTagsRequest):
304
 
305
  # Try multiple models for best results
306
  models_to_try = [
307
- "microsoft/Phi-3-mini-4k-instruct",
308
  "mistralai/Mistral-7B-Instruct-v0.3",
 
309
  "HuggingFaceH4/zephyr-7b-beta",
310
  "meta-llama/Llama-3.2-3B-Instruct",
311
- "meta-llama/Meta-Llama-3-8B-Instruct" # Thêm model backup
312
  ]
313
 
314
  llm_response = ""
@@ -319,8 +307,7 @@ async def generate_tags(request: EventTagsRequest):
319
  try:
320
  print(f"Trying model: {model_name}")
321
 
322
- # FIXED: Sử dụng chat_completion thay vì text_generation
323
- # Format messages cho chat completion API
324
  messages = [
325
  {
326
  "role": "user",
@@ -328,19 +315,19 @@ async def generate_tags(request: EventTagsRequest):
328
  }
329
  ]
330
 
331
- # Generate với chat_completion
332
  response = client.chat_completion(
333
  messages=messages,
334
  model=model_name,
335
- max_tokens=800,
336
- temperature=0.7,
337
  top_p=0.9
338
  )
339
 
340
- # Lấy nội dung response
341
  llm_response = response.choices[0].message.content
342
 
343
- if llm_response and len(llm_response.strip()) > 50:
344
  model_used = model_name
345
  print(f"✓ Success with {model_name}")
346
  break
@@ -351,16 +338,32 @@ async def generate_tags(request: EventTagsRequest):
351
  continue
352
 
353
  # Check if generation succeeded
354
- if not llm_response or len(llm_response.strip()) < 50:
355
  raise HTTPException(
356
  status_code=500,
357
- detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
358
  )
359
 
360
- # Parse LLM response into structured format
361
  parsed_result = parse_llm_response(llm_response, request.max_tags)
362
 
363
- # Calculate confidence score (basic heuristic)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  confidence = 0.0
365
  if parsed_result["generated_tags"]:
366
  confidence += 0.3
@@ -404,8 +407,6 @@ async def generate_tags(request: EventTagsRequest):
404
  async def generate_tags_batch(events: List[EventTagsRequest]):
405
  """
406
  Batch generate tags for multiple events
407
-
408
- Useful for bulk processing or migrating existing events
409
  """
410
  results = []
411
 
 
9
  from typing import Optional, List
10
  from datetime import datetime
11
  import os
12
+ import json
13
+ import re
14
  from huggingface_hub import InferenceClient
15
  import uvicorn
16
 
 
18
  app = FastAPI(
19
  title="Event Tags Generator API",
20
  description="AI-powered automatic tag generation for events using LLM",
21
+ version="1.0.1"
22
  )
23
 
24
  # CORS middleware
 
69
  return {
70
  "status": "running",
71
  "service": "Event Tags Generator API",
72
+ "version": "1.0.1",
73
  "description": "Generate tags, keywords, categories automatically from event info",
74
  "endpoints": {
75
  "POST /generate-tags": {
 
82
  "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
83
  "language": "string (optional, default: 'vi') - Ngôn ngữ output",
84
  "hf_token": "string (optional) - Hugging Face token"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  }
87
  },
 
98
  language: str
99
  ) -> str:
100
  """
101
+ Build a concise, JSON-focused prompt for better parsing
102
  """
103
 
104
+ lang_instruction = "tiếng Việt" if language == "vi" else "English"
105
 
106
+ # Shorter, more focused prompt that demands JSON output
107
+ prompt = f"""Phân tích sự kiện và tạo metadata theo format JSON bên dưới.
108
+
109
+ SỰ KIỆN:
110
+ Tên: {event_name}
111
+ Danh mục: {category}
112
+ tả ngắn: {short_desc}
113
+ Mô tả chi tiết: {detailed_desc}
114
+
115
+ YÊU CẦU: Tạo output dưới dạng JSON với các trường sau (sử dụng {lang_instruction}):
116
+
117
+ {{
118
+ "tags": ["tag1", "tag2", "tag3", ...],
119
+ "primary_category": "danh mục chính",
120
+ "secondary_categories": ["danh mục phụ 1", "danh mục phụ 2"],
121
+ "keywords": ["keyword1", "keyword2", ...],
122
+ "hashtags": ["#hashtag1", "#hashtag2", ...],
123
+ "target_audience": ["đối tượng 1", "đối tượng 2"],
124
+ "sentiment": "positive/neutral/negative"
125
+ }}
126
+
127
+ CHÚ Ý:
128
+ - Tạo tối đa {max_tags} tags
129
+ - Tags phải lowercase, ngắn gọn, dễ tìm kiếm
130
+ - Hashtags bắt đầu bằng #
131
+ - Primary_category chọn từ: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí
132
+ - Chỉ trả về JSON, không thêm text khác
133
+
134
+ JSON OUTPUT:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  return prompt
137
 
138
 
139
  def parse_llm_response(response_text: str, max_tags: int) -> dict:
140
  """
141
+ Parse LLM response - handles both JSON and text formats
 
142
  """
143
 
144
+ # Default result
145
  result = {
146
  "generated_tags": [],
147
  "primary_category": "",
 
152
  "sentiment": "neutral"
153
  }
154
 
155
+ # Debug: Print raw response
156
+ print(f"\n{'='*60}")
157
+ print(f"RAW RESPONSE FROM MODEL:")
158
+ print(f"{'='*60}")
159
+ print(response_text[:500]) # Print first 500 chars
160
+ print(f"{'='*60}\n")
161
 
162
+ # Try to extract JSON from response
163
+ try:
164
+ # Method 1: Try direct JSON parse
165
+ try:
166
+ data = json.loads(response_text)
167
+ if isinstance(data, dict):
168
+ result["generated_tags"] = data.get("tags", [])[:max_tags]
169
+ result["primary_category"] = data.get("primary_category", "")
170
+ result["secondary_categories"] = data.get("secondary_categories", [])
171
+ result["keywords"] = data.get("keywords", [])
172
+ result["hashtags"] = data.get("hashtags", [])
173
+ result["target_audience"] = data.get("target_audience", [])
174
+ result["sentiment"] = data.get("sentiment", "neutral")
175
+ print(" Parsed using direct JSON")
176
+ return result
177
+ except json.JSONDecodeError:
178
+ pass
 
 
179
 
180
+ # Method 2: Extract JSON from text using regex
181
+ json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
182
+ if json_match:
183
+ try:
184
+ json_str = json_match.group(0)
185
+ data = json.loads(json_str)
186
+ result["generated_tags"] = data.get("tags", [])[:max_tags]
187
+ result["primary_category"] = data.get("primary_category", "")
188
+ result["secondary_categories"] = data.get("secondary_categories", [])
189
+ result["keywords"] = data.get("keywords", [])
190
+ result["hashtags"] = data.get("hashtags", [])
191
+ result["target_audience"] = data.get("target_audience", [])
192
+ result["sentiment"] = data.get("sentiment", "neutral")
193
+ print("✓ Parsed using regex JSON extraction")
194
+ return result
195
+ except:
196
+ pass
197
 
198
+ # Method 3: Parse line by line (fallback)
199
+ lines = response_text.strip().split('\n')
200
+ for line in lines:
201
+ line = line.strip()
202
+ if not line:
203
+ continue
204
+
205
+ # Parse TAGS
206
+ if 'tags' in line.lower() and ':' in line:
207
+ # Extract array content
208
+ match = re.search(r'\[(.*?)\]', line)
209
+ if match:
210
+ tags_str = match.group(1)
211
+ tags = [t.strip().strip('"\'').lower() for t in tags_str.split(',') if t.strip()]
212
+ result["generated_tags"] = tags[:max_tags]
213
+
214
+ # Parse PRIMARY_CATEGORY
215
+ elif 'primary_category' in line.lower() and ':' in line:
216
+ value = line.split(':', 1)[1].strip().strip(',"\'')
217
+ result["primary_category"] = value
218
+
219
+ # Parse SECONDARY_CATEGORIES
220
+ elif 'secondary_categories' in line.lower() and ':' in line:
221
+ match = re.search(r'\[(.*?)\]', line)
222
+ if match:
223
+ cats_str = match.group(1)
224
+ result["secondary_categories"] = [c.strip().strip('"\'') for c in cats_str.split(',') if c.strip()]
225
+
226
+ # Parse KEYWORDS
227
+ elif 'keywords' in line.lower() and ':' in line:
228
+ match = re.search(r'\[(.*?)\]', line)
229
+ if match:
230
+ kw_str = match.group(1)
231
+ result["keywords"] = [k.strip().strip('"\'') for k in kw_str.split(',') if k.strip()]
232
+
233
+ # Parse HASHTAGS
234
+ elif 'hashtags' in line.lower() and ':' in line:
235
+ match = re.search(r'\[(.*?)\]', line)
236
+ if match:
237
+ ht_str = match.group(1)
238
+ hashtags = [h.strip().strip('"\'') for h in ht_str.split(',') if h.strip()]
239
+ result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
240
+
241
+ # Parse TARGET_AUDIENCE
242
+ elif 'target_audience' in line.lower() and ':' in line:
243
+ match = re.search(r'\[(.*?)\]', line)
244
+ if match:
245
+ aud_str = match.group(1)
246
+ result["target_audience"] = [a.strip().strip('"\'') for a in aud_str.split(',') if a.strip()]
247
+
248
+ # Parse SENTIMENT
249
+ elif 'sentiment' in line.lower() and ':' in line:
250
+ sentiment = line.split(':', 1)[1].strip().strip(',"\'').lower()
251
+ if sentiment in ['positive', 'neutral', 'negative']:
252
+ result["sentiment"] = sentiment
253
 
254
+ print("✓ Parsed using line-by-line fallback")
 
 
 
255
 
256
+ except Exception as e:
257
+ print(f"✗ Parsing error: {str(e)}")
 
 
 
258
 
259
  return result
260
 
 
263
  async def generate_tags(request: EventTagsRequest):
264
  """
265
  Generate comprehensive tags and metadata for an event
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  """
267
 
268
  try:
 
292
 
293
  # Try multiple models for best results
294
  models_to_try = [
 
295
  "mistralai/Mistral-7B-Instruct-v0.3",
296
+ "microsoft/Phi-3-mini-4k-instruct",
297
  "HuggingFaceH4/zephyr-7b-beta",
298
  "meta-llama/Llama-3.2-3B-Instruct",
299
+ "meta-llama/Meta-Llama-3-8B-Instruct"
300
  ]
301
 
302
  llm_response = ""
 
307
  try:
308
  print(f"Trying model: {model_name}")
309
 
310
+ # Format messages
 
311
  messages = [
312
  {
313
  "role": "user",
 
315
  }
316
  ]
317
 
318
+ # Generate with chat_completion
319
  response = client.chat_completion(
320
  messages=messages,
321
  model=model_name,
322
+ max_tokens=1000, # Increased for more content
323
+ temperature=0.3, # Lower temperature for more consistent output
324
  top_p=0.9
325
  )
326
 
327
+ # Get response content
328
  llm_response = response.choices[0].message.content
329
 
330
+ if llm_response and len(llm_response.strip()) > 20:
331
  model_used = model_name
332
  print(f"✓ Success with {model_name}")
333
  break
 
338
  continue
339
 
340
  # Check if generation succeeded
341
+ if not llm_response or len(llm_response.strip()) < 20:
342
  raise HTTPException(
343
  status_code=500,
344
+ detail=f"All models failed. Last error: {str(last_error)}"
345
  )
346
 
347
+ # Parse LLM response
348
  parsed_result = parse_llm_response(llm_response, request.max_tags)
349
 
350
+ # If parsing failed, create basic fallback tags
351
+ if not parsed_result["generated_tags"]:
352
+ print("⚠ Warning: No tags parsed, creating fallback tags")
353
+ # Create basic tags from event info
354
+ fallback_tags = []
355
+ # Add category as tag
356
+ if request.category:
357
+ fallback_tags.append(request.category.lower())
358
+ # Extract words from event name
359
+ name_words = [w.lower() for w in request.event_name.split() if len(w) > 3]
360
+ fallback_tags.extend(name_words[:3])
361
+
362
+ parsed_result["generated_tags"] = fallback_tags[:request.max_tags]
363
+ parsed_result["primary_category"] = request.category
364
+ parsed_result["sentiment"] = "positive"
365
+
366
+ # Calculate confidence score
367
  confidence = 0.0
368
  if parsed_result["generated_tags"]:
369
  confidence += 0.3
 
407
  async def generate_tags_batch(events: List[EventTagsRequest]):
408
  """
409
  Batch generate tags for multiple events
 
 
410
  """
411
  results = []
412