Spaces:

minh9972t12
/

chatbot_test

Running

App Files Files Community

minh9972t12 commited on Oct 18

Commit

92405b2

verified ·

1 Parent(s): fdc8d37

Create event_tags_generator.py

Browse files

Files changed (1) hide show

event_tags_generator.py +430 -0

event_tags_generator.py ADDED Viewed

	@@ -0,0 +1,430 @@

+"""
+Event Tags Generator - AI Chatbot for automatic tag generation
+Generates relevant tags, keywords, and categories from event information
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List
+from datetime import datetime
+import os
+from huggingface_hub import InferenceClient
+# Initialize FastAPI
+app = FastAPI(
+    title="Event Tags Generator API",
+    description="AI-powered automatic tag generation for events using LLM",
+    version="1.0.0"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Hugging Face token
+hf_token = os.getenv("HUGGINGFACE_TOKEN")
+if hf_token:
+    print("✓ Hugging Face token configured")
+else:
+    print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
+# Pydantic models
+class EventTagsRequest(BaseModel):
+    event_name: str
+    category: str
+    short_description: str
+    detailed_description: str
+    max_tags: Optional[int] = 10
+    language: Optional[str] = "vi"  # vi = Vietnamese, en = English
+    hf_token: Optional[str] = None
+class EventTagsResponse(BaseModel):
+    event_name: str
+    generated_tags: List[str]
+    primary_category: str
+    secondary_categories: List[str]
+    keywords: List[str]
+    hashtags: List[str]
+    target_audience: List[str]
+    sentiment: str
+    confidence_score: float
+    generation_time: str
+    model_used: str
+@app.get("/")
+async def root():
+    """API Information"""
+    return {
+        "status": "running",
+        "service": "Event Tags Generator API",
+        "version": "1.0.0",
+        "description": "Generate tags, keywords, categories automatically from event info",
+        "endpoints": {
+            "POST /generate-tags": {
+                "description": "Generate tags from event information",
+                "request_body": {
+                    "event_name": "string - Tên sự kiện",
+                    "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
+                    "short_description": "string - Mô tả ngắn (1-2 câu)",
+                    "detailed_description": "string - Mô tả chi tiết",
+                    "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
+                    "language": "string (optional, default: 'vi') - Ngôn ngữ output",
+                    "hf_token": "string (optional) - Hugging Face token"
+                },
+                "response": {
+                    "generated_tags": "array - Danh sách tags",
+                    "primary_category": "string - Danh mục chính",
+                    "secondary_categories": "array - Danh mục phụ",
+                    "keywords": "array - Keywords SEO",
+                    "hashtags": "array - Social media hashtags",
+                    "target_audience": "array - Đối tượng mục tiêu",
+                    "sentiment": "string - Cảm xúc (positive/neutral/negative)",
+                    "confidence_score": "float - Độ tin cậy (0-1)"
+                },
+                "example": {
+                    "request": {
+                        "event_name": "Vietnam Music Festival 2025",
+                        "category": "Âm nhạc",
+                        "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
+                        "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
+                    },
+                    "response": {
+                        "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
+                        "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
+                    }
+                }
+            }
+        },
+        "usage": "POST /generate-tags with event information in JSON body"
+    }
+def build_powerful_prompt(
+    event_name: str,
+    category: str,
+    short_desc: str,
+    detailed_desc: str,
+    max_tags: int,
+    language: str
+) -> str:
+    """
+    Build a powerful, structured prompt for LLM to generate high-quality tags
+    """
+    lang_instruction = "in Vietnamese" if language == "vi" else "in English"
+    prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
+**EVENT INFORMATION:**
+• Event Name: {event_name}
+• Primary Category: {category}
+• Short Description: {short_desc}
+• Detailed Description: {detailed_desc}
+**YOUR TASK:**
+Analyze the event information above and generate the following {lang_instruction}:
+1. **TAGS** ({max_tags} tags maximum):
+   - Generate specific, relevant, searchable tags
+   - Include event type, theme, activities, location references
+   - Mix broad and specific tags for better discoverability
+   - Use lowercase, single words or short phrases
+   - Example format: âm nhạc, festival, concert, outdoor, hà nội
+2. **PRIMARY CATEGORY** (1 category):
+   - The main category that best describes this event
+   - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
+3. **SECONDARY CATEGORIES** (2-3 categories):
+   - Additional relevant categories
+   - Help with cross-categorization
+4. **KEYWORDS** (5-8 keywords):
+   - SEO-optimized keywords for search engines
+   - Include long-tail keywords
+   - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
+5. **HASHTAGS** (5-7 hashtags):
+   - Social media friendly hashtags
+   - Mix of popular and unique hashtags
+   - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
+6. **TARGET AUDIENCE** (2-4 audience groups):
+   - Who would be interested in this event?
+   - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
+7. **SENTIMENT** (one word):
+   - Overall emotion/feeling: positive, neutral, or negative
+   - Based on event description tone
+**OUTPUT FORMAT (JSON-like structure):**
+TAGS: tag1, tag2, tag3, ...
+PRIMARY_CATEGORY: category_name
+SECONDARY_CATEGORIES: cat1, cat2, cat3
+KEYWORDS: keyword1, keyword2, keyword3, ...
+HASHTAGS: #tag1, #tag2, #tag3, ...
+TARGET_AUDIENCE: audience1, audience2, audience3
+SENTIMENT: positive/neutral/negative
+**IMPORTANT GUIDELINES:**
+- Be specific and relevant to the event
+- Use terms people would actually search for
+- Balance between popular and niche terms
+- Consider SEO and social media best practices
+- Keep tags concise and meaningful
+- Generate output {lang_instruction}
+Now, analyze the event and generate the metadata:"""
+    return prompt
+def parse_llm_response(response_text: str, max_tags: int) -> dict:
+    """
+    Parse LLM response into structured format
+    Handles various response formats robustly
+    """
+    result = {
+        "generated_tags": [],
+        "primary_category": "",
+        "secondary_categories": [],
+        "keywords": [],
+        "hashtags": [],
+        "target_audience": [],
+        "sentiment": "neutral"
+    }
+    lines = response_text.strip().split('\n')
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        # Parse TAGS
+        if line.upper().startswith('TAGS:'):
+            tags_text = line.split(':', 1)[1].strip()
+            tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
+            result["generated_tags"] = tags[:max_tags]
+        # Parse PRIMARY_CATEGORY
+        elif line.upper().startswith('PRIMARY_CATEGORY:'):
+            result["primary_category"] = line.split(':', 1)[1].strip()
+        # Parse SECONDARY_CATEGORIES
+        elif line.upper().startswith('SECONDARY_CATEGORIES:'):
+            cats_text = line.split(':', 1)[1].strip()
+            result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
+        # Parse KEYWORDS
+        elif line.upper().startswith('KEYWORDS:'):
+            kw_text = line.split(':', 1)[1].strip()
+            result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
+        # Parse HASHTAGS
+        elif line.upper().startswith('HASHTAGS:'):
+            ht_text = line.split(':', 1)[1].strip()
+            hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
+            # Ensure hashtags start with #
+            result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
+        # Parse TARGET_AUDIENCE
+        elif line.upper().startswith('TARGET_AUDIENCE:'):
+            aud_text = line.split(':', 1)[1].strip()
+            result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
+        # Parse SENTIMENT
+        elif line.upper().startswith('SENTIMENT:'):
+            sentiment = line.split(':', 1)[1].strip().lower()
+            if sentiment in ['positive', 'neutral', 'negative']:
+                result["sentiment"] = sentiment
+    return result
+@app.post("/generate-tags", response_model=EventTagsResponse)
+async def generate_tags(request: EventTagsRequest):
+    """
+    Generate comprehensive tags and metadata for an event
+    This endpoint uses advanced LLM prompting to generate:
+    - Relevant tags for searchability
+    - Category classification
+    - SEO keywords
+    - Social media hashtags
+    - Target audience identification
+    - Sentiment analysis
+    **Input:**
+    - event_name: Name of the event
+    - category: Primary category (music, sports, tech, etc.)
+    - short_description: Brief 1-2 sentence description
+    - detailed_description: Full event description with details
+    **Output:**
+    - Structured metadata ready for use in event management system
+    - All fields optimized for search and discovery
+    """
+    try:
+        start_time = datetime.utcnow()
+        # Get token
+        token = request.hf_token or hf_token
+        if not token:
+            raise HTTPException(
+                status_code=401,
+                detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
+            )
+        # Build powerful prompt
+        prompt = build_powerful_prompt(
+            event_name=request.event_name,
+            category=request.category,
+            short_desc=request.short_description,
+            detailed_desc=request.detailed_description,
+            max_tags=request.max_tags,
+            language=request.language
+        )
+        # Initialize HF client
+        client = InferenceClient(token=token)
+        # Try multiple models for best results
+        models_to_try = [
+            "microsoft/Phi-3-mini-4k-instruct",
+            "mistralai/Mistral-7B-Instruct-v0.3",
+            "HuggingFaceH4/zephyr-7b-beta",
+            "meta-llama/Llama-3.2-3B-Instruct"
+        ]
+        llm_response = ""
+        model_used = ""
+        last_error = None
+        for model_name in models_to_try:
+            try:
+                print(f"Trying model: {model_name}")
+                # Generate with LLM
+                llm_response = client.text_generation(
+                    prompt,
+                    model=model_name,
+                    max_new_tokens=800,
+                    temperature=0.7,
+                    top_p=0.9,
+                    do_sample=True,
+                    return_full_text=False
+                )
+                if llm_response and len(llm_response.strip()) > 50:
+                    model_used = model_name
+                    print(f"✓ Success with {model_name}")
+                    break
+            except Exception as model_error:
+                print(f"✗ Failed with {model_name}: {str(model_error)}")
+                last_error = model_error
+                continue
+        # Check if generation succeeded
+        if not llm_response or len(llm_response.strip()) < 50:
+            raise HTTPException(
+                status_code=500,
+                detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
+            )
+        # Parse LLM response into structured format
+        parsed_result = parse_llm_response(llm_response, request.max_tags)
+        # Calculate confidence score (basic heuristic)
+        confidence = 0.0
+        if parsed_result["generated_tags"]:
+            confidence += 0.3
+        if parsed_result["primary_category"]:
+            confidence += 0.2
+        if parsed_result["keywords"]:
+            confidence += 0.2
+        if parsed_result["hashtags"]:
+            confidence += 0.15
+        if parsed_result["target_audience"]:
+            confidence += 0.15
+        end_time = datetime.utcnow()
+        generation_time = (end_time - start_time).total_seconds()
+        # Build response
+        return EventTagsResponse(
+            event_name=request.event_name,
+            generated_tags=parsed_result["generated_tags"],
+            primary_category=parsed_result["primary_category"],
+            secondary_categories=parsed_result["secondary_categories"],
+            keywords=parsed_result["keywords"],
+            hashtags=parsed_result["hashtags"],
+            target_audience=parsed_result["target_audience"],
+            sentiment=parsed_result["sentiment"],
+            confidence_score=round(confidence, 2),
+            generation_time=f"{generation_time:.2f}s",
+            model_used=model_used.split('/')[-1] if model_used else "unknown"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error generating tags: {str(e)}"
+        )
+@app.post("/generate-tags/batch")
+async def generate_tags_batch(events: List[EventTagsRequest]):
+    """
+    Batch generate tags for multiple events
+    Useful for bulk processing or migrating existing events
+    """
+    results = []
+    for event in events:
+        try:
+            result = await generate_tags(event)
+            results.append({
+                "event_name": event.event_name,
+                "success": True,
+                "data": result
+            })
+        except Exception as e:
+            results.append({
+                "event_name": event.event_name,
+                "success": False,
+                "error": str(e)
+            })
+    return {
+        "total": len(events),
+        "successful": sum(1 for r in results if r["success"]),
+        "failed": sum(1 for r in results if not r["success"]),
+        "results": results
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8001,  # Different port from main API
+        log_level="info"
+    )