Spaces:

minh9972t12
/

autoGenerateTags

Sleeping

App Files Files Community

minh9972t12 commited on Oct 18, 2025

Commit

22304be

verified ·

1 Parent(s): 47ee52f

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -162

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from pydantic import BaseModel
 from typing import Optional, List
 from datetime import datetime
 import os
 from huggingface_hub import InferenceClient
 import uvicorn
@@ -16,7 +18,7 @@ import uvicorn
 app = FastAPI(
     title="Event Tags Generator API",
     description="AI-powered automatic tag generation for events using LLM",
-    version="1.0.0"
 )
 # CORS middleware
@@ -67,7 +69,7 @@ async def root():
     return {
         "status": "running",
         "service": "Event Tags Generator API",
-        "version": "1.0.0",
         "description": "Generate tags, keywords, categories automatically from event info",
         "endpoints": {
             "POST /generate-tags": {
@@ -80,28 +82,6 @@ async def root():
                     "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
                     "language": "string (optional, default: 'vi') - Ngôn ngữ output",
                     "hf_token": "string (optional) - Hugging Face token"
-                },
-                "response": {
-                    "generated_tags": "array - Danh sách tags",
-                    "primary_category": "string - Danh mục chính",
-                    "secondary_categories": "array - Danh mục phụ",
-                    "keywords": "array - Keywords SEO",
-                    "hashtags": "array - Social media hashtags",
-                    "target_audience": "array - Đối tượng mục tiêu",
-                    "sentiment": "string - Cảm xúc (positive/neutral/negative)",
-                    "confidence_score": "float - Độ tin cậy (0-1)"
-                },
-                "example": {
-                    "request": {
-                        "event_name": "Vietnam Music Festival 2025",
-                        "category": "Âm nhạc",
-                        "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
-                        "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
-                    },
-                    "response": {
-                        "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
-                        "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
-                    }
                 }
             }
         },
@@ -118,83 +98,50 @@ def build_powerful_prompt(
     language: str
 ) -> str:
     """
-    Build a powerful, structured prompt for LLM to generate high-quality tags
     """
-    lang_instruction = "in Vietnamese" if language == "vi" else "in English"
-    prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
-**EVENT INFORMATION:**
-• Event Name: {event_name}
-• Primary Category: {category}
-• Short Description: {short_desc}
-• Detailed Description: {detailed_desc}
-**YOUR TASK:**
-Analyze the event information above and generate the following {lang_instruction}:
-1. **TAGS** ({max_tags} tags maximum):
-   - Generate specific, relevant, searchable tags
-   - Include event type, theme, activities, location references
-   - Mix broad and specific tags for better discoverability
-   - Use lowercase, single words or short phrases
-   - Example format: âm nhạc, festival, concert, outdoor, hà nội
-2. **PRIMARY CATEGORY** (1 category):
-   - The main category that best describes this event
-   - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
-3. **SECONDARY CATEGORIES** (2-3 categories):
-   - Additional relevant categories
-   - Help with cross-categorization
-4. **KEYWORDS** (5-8 keywords):
-   - SEO-optimized keywords for search engines
-   - Include long-tail keywords
-   - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
-5. **HASHTAGS** (5-7 hashtags):
-   - Social media friendly hashtags
-   - Mix of popular and unique hashtags
-   - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
-6. **TARGET AUDIENCE** (2-4 audience groups):
-   - Who would be interested in this event?
-   - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
-7. **SENTIMENT** (one word):
-   - Overall emotion/feeling: positive, neutral, or negative
-   - Based on event description tone
-**OUTPUT FORMAT (JSON-like structure):**
-TAGS: tag1, tag2, tag3, ...
-PRIMARY_CATEGORY: category_name
-SECONDARY_CATEGORIES: cat1, cat2, cat3
-KEYWORDS: keyword1, keyword2, keyword3, ...
-HASHTAGS: #tag1, #tag2, #tag3, ...
-TARGET_AUDIENCE: audience1, audience2, audience3
-SENTIMENT: positive/neutral/negative
-**IMPORTANT GUIDELINES:**
-- Be specific and relevant to the event
-- Use terms people would actually search for
-- Balance between popular and niche terms
-- Consider SEO and social media best practices
-- Keep tags concise and meaningful
-- Generate output {lang_instruction}
-Now, analyze the event and generate the metadata:"""
     return prompt
 def parse_llm_response(response_text: str, max_tags: int) -> dict:
     """
-    Parse LLM response into structured format
-    Handles various response formats robustly
     """
     result = {
         "generated_tags": [],
         "primary_category": "",
@@ -205,50 +152,109 @@ def parse_llm_response(response_text: str, max_tags: int) -> dict:
         "sentiment": "neutral"
     }
-    lines = response_text.strip().split('\n')
-    for line in lines:
-        line = line.strip()
-        if not line:
-            continue
-        # Parse TAGS
-        if line.upper().startswith('TAGS:'):
-            tags_text = line.split(':', 1)[1].strip()
-            tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
-            result["generated_tags"] = tags[:max_tags]
-        # Parse PRIMARY_CATEGORY
-        elif line.upper().startswith('PRIMARY_CATEGORY:'):
-            result["primary_category"] = line.split(':', 1)[1].strip()
-        # Parse SECONDARY_CATEGORIES
-        elif line.upper().startswith('SECONDARY_CATEGORIES:'):
-            cats_text = line.split(':', 1)[1].strip()
-            result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
-        # Parse KEYWORDS
-        elif line.upper().startswith('KEYWORDS:'):
-            kw_text = line.split(':', 1)[1].strip()
-            result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
-        # Parse HASHTAGS
-        elif line.upper().startswith('HASHTAGS:'):
-            ht_text = line.split(':', 1)[1].strip()
-            hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
-            # Ensure hashtags start with #
-            result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
-        # Parse TARGET_AUDIENCE
-        elif line.upper().startswith('TARGET_AUDIENCE:'):
-            aud_text = line.split(':', 1)[1].strip()
-            result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
-        # Parse SENTIMENT
-        elif line.upper().startswith('SENTIMENT:'):
-            sentiment = line.split(':', 1)[1].strip().lower()
-            if sentiment in ['positive', 'neutral', 'negative']:
-                result["sentiment"] = sentiment
     return result
@@ -257,24 +263,6 @@ def parse_llm_response(response_text: str, max_tags: int) -> dict:
 async def generate_tags(request: EventTagsRequest):
     """
     Generate comprehensive tags and metadata for an event
-    This endpoint uses advanced LLM prompting to generate:
-    - Relevant tags for searchability
-    - Category classification
-    - SEO keywords
-    - Social media hashtags
-    - Target audience identification
-    - Sentiment analysis
-    **Input:**
-    - event_name: Name of the event
-    - category: Primary category (music, sports, tech, etc.)
-    - short_description: Brief 1-2 sentence description
-    - detailed_description: Full event description with details
-    **Output:**
-    - Structured metadata ready for use in event management system
-    - All fields optimized for search and discovery
     """
     try:
@@ -304,11 +292,11 @@ async def generate_tags(request: EventTagsRequest):
         # Try multiple models for best results
         models_to_try = [
-            "microsoft/Phi-3-mini-4k-instruct",
             "mistralai/Mistral-7B-Instruct-v0.3",
             "HuggingFaceH4/zephyr-7b-beta",
             "meta-llama/Llama-3.2-3B-Instruct",
-            "meta-llama/Meta-Llama-3-8B-Instruct"  # Thêm model backup
         ]
         llm_response = ""
@@ -319,8 +307,7 @@ async def generate_tags(request: EventTagsRequest):
             try:
                 print(f"Trying model: {model_name}")
-                # FIXED: Sử dụng chat_completion thay vì text_generation
-                # Format messages cho chat completion API
                 messages = [
                     {
                         "role": "user",
@@ -328,19 +315,19 @@ async def generate_tags(request: EventTagsRequest):
                     }
                 ]
-                # Generate với chat_completion
                 response = client.chat_completion(
                     messages=messages,
                     model=model_name,
-                    max_tokens=800,
-                    temperature=0.7,
                     top_p=0.9
                 )
-                # Lấy nội dung response
                 llm_response = response.choices[0].message.content
-                if llm_response and len(llm_response.strip()) > 50:
                     model_used = model_name
                     print(f"✓ Success with {model_name}")
                     break
@@ -351,16 +338,32 @@ async def generate_tags(request: EventTagsRequest):
                 continue
         # Check if generation succeeded
-        if not llm_response or len(llm_response.strip()) < 50:
             raise HTTPException(
                 status_code=500,
-                detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
             )
-        # Parse LLM response into structured format
         parsed_result = parse_llm_response(llm_response, request.max_tags)
-        # Calculate confidence score (basic heuristic)
         confidence = 0.0
         if parsed_result["generated_tags"]:
             confidence += 0.3
@@ -404,8 +407,6 @@ async def generate_tags(request: EventTagsRequest):
 async def generate_tags_batch(events: List[EventTagsRequest]):
     """
     Batch generate tags for multiple events
-    Useful for bulk processing or migrating existing events
     """
     results = []

 from typing import Optional, List
 from datetime import datetime
 import os
+import json
+import re
 from huggingface_hub import InferenceClient
 import uvicorn
 app = FastAPI(
     title="Event Tags Generator API",
     description="AI-powered automatic tag generation for events using LLM",
+    version="1.0.1"
 )
 # CORS middleware
     return {
         "status": "running",
         "service": "Event Tags Generator API",
+        "version": "1.0.1",
         "description": "Generate tags, keywords, categories automatically from event info",
         "endpoints": {
             "POST /generate-tags": {
                     "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
                     "language": "string (optional, default: 'vi') - Ngôn ngữ output",
                     "hf_token": "string (optional) - Hugging Face token"
                 }
             }
         },
     language: str
 ) -> str:
     """
+    Build a concise, JSON-focused prompt for better parsing
     """
+    lang_instruction = "tiếng Việt" if language == "vi" else "English"
+    # Shorter, more focused prompt that demands JSON output
+    prompt = f"""Phân tích sự kiện và tạo metadata theo format JSON bên dưới.
+SỰ KIỆN:
+Tên: {event_name}
+Danh mục: {category}
+Mô tả ngắn: {short_desc}
+Mô tả chi tiết: {detailed_desc}
+YÊU CẦU: Tạo output dưới dạng JSON với các trường sau (sử dụng {lang_instruction}):
+{{
+  "tags": ["tag1", "tag2", "tag3", ...],
+  "primary_category": "danh mục chính",
+  "secondary_categories": ["danh mục phụ 1", "danh mục phụ 2"],
+  "keywords": ["keyword1", "keyword2", ...],
+  "hashtags": ["#hashtag1", "#hashtag2", ...],
+  "target_audience": ["đối tượng 1", "đối tượng 2"],
+  "sentiment": "positive/neutral/negative"
+}}
+CHÚ Ý:
+- Tạo tối đa {max_tags} tags
+- Tags phải lowercase, ngắn gọn, dễ tìm kiếm
+- Hashtags bắt đầu bằng #
+- Primary_category chọn từ: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí
+- Chỉ trả về JSON, không thêm text khác
+JSON OUTPUT:"""
     return prompt
 def parse_llm_response(response_text: str, max_tags: int) -> dict:
     """
+    Parse LLM response - handles both JSON and text formats
     """
+    # Default result
     result = {
         "generated_tags": [],
         "primary_category": "",
         "sentiment": "neutral"
     }
+    # Debug: Print raw response
+    print(f"\n{'='*60}")
+    print(f"RAW RESPONSE FROM MODEL:")
+    print(f"{'='*60}")
+    print(response_text[:500])  # Print first 500 chars
+    print(f"{'='*60}\n")
+    # Try to extract JSON from response
+    try:
+        # Method 1: Try direct JSON parse
+        try:
+            data = json.loads(response_text)
+            if isinstance(data, dict):
+                result["generated_tags"] = data.get("tags", [])[:max_tags]
+                result["primary_category"] = data.get("primary_category", "")
+                result["secondary_categories"] = data.get("secondary_categories", [])
+                result["keywords"] = data.get("keywords", [])
+                result["hashtags"] = data.get("hashtags", [])
+                result["target_audience"] = data.get("target_audience", [])
+                result["sentiment"] = data.get("sentiment", "neutral")
+                print("✓ Parsed using direct JSON")
+                return result
+        except json.JSONDecodeError:
+            pass
+        # Method 2: Extract JSON from text using regex
+        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
+        if json_match:
+            try:
+                json_str = json_match.group(0)
+                data = json.loads(json_str)
+                result["generated_tags"] = data.get("tags", [])[:max_tags]
+                result["primary_category"] = data.get("primary_category", "")
+                result["secondary_categories"] = data.get("secondary_categories", [])
+                result["keywords"] = data.get("keywords", [])
+                result["hashtags"] = data.get("hashtags", [])
+                result["target_audience"] = data.get("target_audience", [])
+                result["sentiment"] = data.get("sentiment", "neutral")
+                print("✓ Parsed using regex JSON extraction")
+                return result
+            except:
+                pass
+        # Method 3: Parse line by line (fallback)
+        lines = response_text.strip().split('\n')
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            # Parse TAGS
+            if 'tags' in line.lower() and ':' in line:
+                # Extract array content
+                match = re.search(r'\[(.*?)\]', line)
+                if match:
+                    tags_str = match.group(1)
+                    tags = [t.strip().strip('"\'').lower() for t in tags_str.split(',') if t.strip()]
+                    result["generated_tags"] = tags[:max_tags]
+            # Parse PRIMARY_CATEGORY
+            elif 'primary_category' in line.lower() and ':' in line:
+                value = line.split(':', 1)[1].strip().strip(',"\'')
+                result["primary_category"] = value
+            # Parse SECONDARY_CATEGORIES
+            elif 'secondary_categories' in line.lower() and ':' in line:
+                match = re.search(r'\[(.*?)\]', line)
+                if match:
+                    cats_str = match.group(1)
+                    result["secondary_categories"] = [c.strip().strip('"\'') for c in cats_str.split(',') if c.strip()]
+            # Parse KEYWORDS
+            elif 'keywords' in line.lower() and ':' in line:
+                match = re.search(r'\[(.*?)\]', line)
+                if match:
+                    kw_str = match.group(1)
+                    result["keywords"] = [k.strip().strip('"\'') for k in kw_str.split(',') if k.strip()]
+            # Parse HASHTAGS
+            elif 'hashtags' in line.lower() and ':' in line:
+                match = re.search(r'\[(.*?)\]', line)
+                if match:
+                    ht_str = match.group(1)
+                    hashtags = [h.strip().strip('"\'') for h in ht_str.split(',') if h.strip()]
+                    result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
+            # Parse TARGET_AUDIENCE
+            elif 'target_audience' in line.lower() and ':' in line:
+                match = re.search(r'\[(.*?)\]', line)
+                if match:
+                    aud_str = match.group(1)
+                    result["target_audience"] = [a.strip().strip('"\'') for a in aud_str.split(',') if a.strip()]
+            # Parse SENTIMENT
+            elif 'sentiment' in line.lower() and ':' in line:
+                sentiment = line.split(':', 1)[1].strip().strip(',"\'').lower()
+                if sentiment in ['positive', 'neutral', 'negative']:
+                    result["sentiment"] = sentiment
+        print("✓ Parsed using line-by-line fallback")
+    except Exception as e:
+        print(f"✗ Parsing error: {str(e)}")
     return result
 async def generate_tags(request: EventTagsRequest):
     """
     Generate comprehensive tags and metadata for an event
     """
     try:
         # Try multiple models for best results
         models_to_try = [
             "mistralai/Mistral-7B-Instruct-v0.3",
+            "microsoft/Phi-3-mini-4k-instruct",
             "HuggingFaceH4/zephyr-7b-beta",
             "meta-llama/Llama-3.2-3B-Instruct",
+            "meta-llama/Meta-Llama-3-8B-Instruct"
         ]
         llm_response = ""
             try:
                 print(f"Trying model: {model_name}")
+                # Format messages
                 messages = [
                     {
                         "role": "user",
                     }
                 ]
+                # Generate with chat_completion
                 response = client.chat_completion(
                     messages=messages,
                     model=model_name,
+                    max_tokens=1000,  # Increased for more content
+                    temperature=0.3,  # Lower temperature for more consistent output
                     top_p=0.9
                 )
+                # Get response content
                 llm_response = response.choices[0].message.content
+                if llm_response and len(llm_response.strip()) > 20:
                     model_used = model_name
                     print(f"✓ Success with {model_name}")
                     break
                 continue
         # Check if generation succeeded
+        if not llm_response or len(llm_response.strip()) < 20:
             raise HTTPException(
                 status_code=500,
+                detail=f"All models failed. Last error: {str(last_error)}"
             )
+        # Parse LLM response
         parsed_result = parse_llm_response(llm_response, request.max_tags)
+        # If parsing failed, create basic fallback tags
+        if not parsed_result["generated_tags"]:
+            print("⚠ Warning: No tags parsed, creating fallback tags")
+            # Create basic tags from event info
+            fallback_tags = []
+            # Add category as tag
+            if request.category:
+                fallback_tags.append(request.category.lower())
+            # Extract words from event name
+            name_words = [w.lower() for w in request.event_name.split() if len(w) > 3]
+            fallback_tags.extend(name_words[:3])
+            parsed_result["generated_tags"] = fallback_tags[:request.max_tags]
+            parsed_result["primary_category"] = request.category
+            parsed_result["sentiment"] = "positive"
+        # Calculate confidence score
         confidence = 0.0
         if parsed_result["generated_tags"]:
             confidence += 0.3
 async def generate_tags_batch(events: List[EventTagsRequest]):
     """
     Batch generate tags for multiple events
     """
     results = []