Spaces:
Running
Running
| """ | |
| Event Tags Generator - AI Chatbot for automatic tag generation | |
| Generates relevant tags, keywords, and categories from event information | |
| """ | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional, List | |
| from datetime import datetime | |
| import os | |
| from huggingface_hub import InferenceClient | |
| import uvicorn | |
| # Initialize FastAPI | |
| app = FastAPI( | |
| title="Event Tags Generator API", | |
| description="AI-powered automatic tag generation for events using LLM", | |
| version="1.0.0" | |
| ) | |
| # CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Hugging Face token | |
| hf_token = os.getenv("HUGGINGFACE_TOKEN") | |
| if hf_token: | |
| print("✓ Hugging Face token configured") | |
| else: | |
| print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.") | |
| # Pydantic models | |
| class EventTagsRequest(BaseModel): | |
| event_name: str | |
| category: str | |
| short_description: str | |
| detailed_description: str | |
| max_tags: Optional[int] = 10 | |
| language: Optional[str] = "vi" # vi = Vietnamese, en = English | |
| hf_token: Optional[str] = None | |
| class EventTagsResponse(BaseModel): | |
| event_name: str | |
| generated_tags: List[str] | |
| primary_category: str | |
| secondary_categories: List[str] | |
| keywords: List[str] | |
| hashtags: List[str] | |
| target_audience: List[str] | |
| sentiment: str | |
| confidence_score: float | |
| generation_time: str | |
| model_used: str | |
| async def root(): | |
| """API Information""" | |
| return { | |
| "status": "running", | |
| "service": "Event Tags Generator API", | |
| "version": "1.0.0", | |
| "description": "Generate tags, keywords, categories automatically from event info", | |
| "endpoints": { | |
| "POST /generate-tags": { | |
| "description": "Generate tags from event information", | |
| "request_body": { | |
| "event_name": "string - Tên sự kiện", | |
| "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)", | |
| "short_description": "string - Mô tả ngắn (1-2 câu)", | |
| "detailed_description": "string - Mô tả chi tiết", | |
| "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa", | |
| "language": "string (optional, default: 'vi') - Ngôn ngữ output", | |
| "hf_token": "string (optional) - Hugging Face token" | |
| }, | |
| "response": { | |
| "generated_tags": "array - Danh sách tags", | |
| "primary_category": "string - Danh mục chính", | |
| "secondary_categories": "array - Danh mục phụ", | |
| "keywords": "array - Keywords SEO", | |
| "hashtags": "array - Social media hashtags", | |
| "target_audience": "array - Đối tượng mục tiêu", | |
| "sentiment": "string - Cảm xúc (positive/neutral/negative)", | |
| "confidence_score": "float - Độ tin cậy (0-1)" | |
| }, | |
| "example": { | |
| "request": { | |
| "event_name": "Vietnam Music Festival 2025", | |
| "category": "Âm nhạc", | |
| "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam", | |
| "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..." | |
| }, | |
| "response": { | |
| "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"], | |
| "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"] | |
| } | |
| } | |
| } | |
| }, | |
| "usage": "POST /generate-tags with event information in JSON body" | |
| } | |
| def build_powerful_prompt( | |
| event_name: str, | |
| category: str, | |
| short_desc: str, | |
| detailed_desc: str, | |
| max_tags: int, | |
| language: str | |
| ) -> str: | |
| """ | |
| Build a powerful, structured prompt for LLM to generate high-quality tags | |
| """ | |
| lang_instruction = "in Vietnamese" if language == "vi" else "in English" | |
| prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata. | |
| **EVENT INFORMATION:** | |
| • Event Name: {event_name} | |
| • Primary Category: {category} | |
| • Short Description: {short_desc} | |
| • Detailed Description: {detailed_desc} | |
| **YOUR TASK:** | |
| Analyze the event information above and generate the following {lang_instruction}: | |
| 1. **TAGS** ({max_tags} tags maximum): | |
| - Generate specific, relevant, searchable tags | |
| - Include event type, theme, activities, location references | |
| - Mix broad and specific tags for better discoverability | |
| - Use lowercase, single words or short phrases | |
| - Example format: âm nhạc, festival, concert, outdoor, hà nội | |
| 2. **PRIMARY CATEGORY** (1 category): | |
| - The main category that best describes this event | |
| - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác | |
| 3. **SECONDARY CATEGORIES** (2-3 categories): | |
| - Additional relevant categories | |
| - Help with cross-categorization | |
| 4. **KEYWORDS** (5-8 keywords): | |
| - SEO-optimized keywords for search engines | |
| - Include long-tail keywords | |
| - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam" | |
| 5. **HASHTAGS** (5-7 hashtags): | |
| - Social media friendly hashtags | |
| - Mix of popular and unique hashtags | |
| - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents | |
| 6. **TARGET AUDIENCE** (2-4 audience groups): | |
| - Who would be interested in this event? | |
| - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên | |
| 7. **SENTIMENT** (one word): | |
| - Overall emotion/feeling: positive, neutral, or negative | |
| - Based on event description tone | |
| **OUTPUT FORMAT (JSON-like structure):** | |
| TAGS: tag1, tag2, tag3, ... | |
| PRIMARY_CATEGORY: category_name | |
| SECONDARY_CATEGORIES: cat1, cat2, cat3 | |
| KEYWORDS: keyword1, keyword2, keyword3, ... | |
| HASHTAGS: #tag1, #tag2, #tag3, ... | |
| TARGET_AUDIENCE: audience1, audience2, audience3 | |
| SENTIMENT: positive/neutral/negative | |
| **IMPORTANT GUIDELINES:** | |
| - Be specific and relevant to the event | |
| - Use terms people would actually search for | |
| - Balance between popular and niche terms | |
| - Consider SEO and social media best practices | |
| - Keep tags concise and meaningful | |
| - Generate output {lang_instruction} | |
| Now, analyze the event and generate the metadata:""" | |
| return prompt | |
| def parse_llm_response(response_text: str, max_tags: int) -> dict: | |
| """ | |
| Parse LLM response into structured format | |
| Handles various response formats robustly | |
| """ | |
| result = { | |
| "generated_tags": [], | |
| "primary_category": "", | |
| "secondary_categories": [], | |
| "keywords": [], | |
| "hashtags": [], | |
| "target_audience": [], | |
| "sentiment": "neutral" | |
| } | |
| lines = response_text.strip().split('\n') | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| # Parse TAGS | |
| if line.upper().startswith('TAGS:'): | |
| tags_text = line.split(':', 1)[1].strip() | |
| tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()] | |
| result["generated_tags"] = tags[:max_tags] | |
| # Parse PRIMARY_CATEGORY | |
| elif line.upper().startswith('PRIMARY_CATEGORY:'): | |
| result["primary_category"] = line.split(':', 1)[1].strip() | |
| # Parse SECONDARY_CATEGORIES | |
| elif line.upper().startswith('SECONDARY_CATEGORIES:'): | |
| cats_text = line.split(':', 1)[1].strip() | |
| result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()] | |
| # Parse KEYWORDS | |
| elif line.upper().startswith('KEYWORDS:'): | |
| kw_text = line.split(':', 1)[1].strip() | |
| result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()] | |
| # Parse HASHTAGS | |
| elif line.upper().startswith('HASHTAGS:'): | |
| ht_text = line.split(':', 1)[1].strip() | |
| hashtags = [h.strip() for h in ht_text.split(',') if h.strip()] | |
| # Ensure hashtags start with # | |
| result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags] | |
| # Parse TARGET_AUDIENCE | |
| elif line.upper().startswith('TARGET_AUDIENCE:'): | |
| aud_text = line.split(':', 1)[1].strip() | |
| result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()] | |
| # Parse SENTIMENT | |
| elif line.upper().startswith('SENTIMENT:'): | |
| sentiment = line.split(':', 1)[1].strip().lower() | |
| if sentiment in ['positive', 'neutral', 'negative']: | |
| result["sentiment"] = sentiment | |
| return result | |
| async def generate_tags(request: EventTagsRequest): | |
| """ | |
| Generate comprehensive tags and metadata for an event | |
| This endpoint uses advanced LLM prompting to generate: | |
| - Relevant tags for searchability | |
| - Category classification | |
| - SEO keywords | |
| - Social media hashtags | |
| - Target audience identification | |
| - Sentiment analysis | |
| **Input:** | |
| - event_name: Name of the event | |
| - category: Primary category (music, sports, tech, etc.) | |
| - short_description: Brief 1-2 sentence description | |
| - detailed_description: Full event description with details | |
| **Output:** | |
| - Structured metadata ready for use in event management system | |
| - All fields optimized for search and discovery | |
| """ | |
| try: | |
| start_time = datetime.utcnow() | |
| # Get token | |
| token = request.hf_token or hf_token | |
| if not token: | |
| raise HTTPException( | |
| status_code=401, | |
| detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body." | |
| ) | |
| # Build powerful prompt | |
| prompt = build_powerful_prompt( | |
| event_name=request.event_name, | |
| category=request.category, | |
| short_desc=request.short_description, | |
| detailed_desc=request.detailed_description, | |
| max_tags=request.max_tags, | |
| language=request.language | |
| ) | |
| # Initialize HF client | |
| client = InferenceClient(token=token) | |
| # Try multiple models for best results | |
| models_to_try = [ | |
| "microsoft/Phi-3-mini-4k-instruct", | |
| "mistralai/Mistral-7B-Instruct-v0.3", | |
| "HuggingFaceH4/zephyr-7b-beta", | |
| "meta-llama/Llama-3.2-3B-Instruct" | |
| ] | |
| llm_response = "" | |
| model_used = "" | |
| last_error = None | |
| for model_name in models_to_try: | |
| try: | |
| print(f"Trying model: {model_name}") | |
| # Generate with LLM | |
| llm_response = client.text_generation( | |
| prompt, | |
| model=model_name, | |
| max_new_tokens=800, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| return_full_text=False | |
| ) | |
| if llm_response and len(llm_response.strip()) > 50: | |
| model_used = model_name | |
| print(f"✓ Success with {model_name}") | |
| break | |
| except Exception as model_error: | |
| print(f"✗ Failed with {model_name}: {str(model_error)}") | |
| last_error = model_error | |
| continue | |
| # Check if generation succeeded | |
| if not llm_response or len(llm_response.strip()) < 50: | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token" | |
| ) | |
| # Parse LLM response into structured format | |
| parsed_result = parse_llm_response(llm_response, request.max_tags) | |
| # Calculate confidence score (basic heuristic) | |
| confidence = 0.0 | |
| if parsed_result["generated_tags"]: | |
| confidence += 0.3 | |
| if parsed_result["primary_category"]: | |
| confidence += 0.2 | |
| if parsed_result["keywords"]: | |
| confidence += 0.2 | |
| if parsed_result["hashtags"]: | |
| confidence += 0.15 | |
| if parsed_result["target_audience"]: | |
| confidence += 0.15 | |
| end_time = datetime.utcnow() | |
| generation_time = (end_time - start_time).total_seconds() | |
| # Build response | |
| return EventTagsResponse( | |
| event_name=request.event_name, | |
| generated_tags=parsed_result["generated_tags"], | |
| primary_category=parsed_result["primary_category"], | |
| secondary_categories=parsed_result["secondary_categories"], | |
| keywords=parsed_result["keywords"], | |
| hashtags=parsed_result["hashtags"], | |
| target_audience=parsed_result["target_audience"], | |
| sentiment=parsed_result["sentiment"], | |
| confidence_score=round(confidence, 2), | |
| generation_time=f"{generation_time:.2f}s", | |
| model_used=model_used.split('/')[-1] if model_used else "unknown" | |
| ) | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Error generating tags: {str(e)}" | |
| ) | |
| async def generate_tags_batch(events: List[EventTagsRequest]): | |
| """ | |
| Batch generate tags for multiple events | |
| Useful for bulk processing or migrating existing events | |
| """ | |
| results = [] | |
| for event in events: | |
| try: | |
| result = await generate_tags(event) | |
| results.append({ | |
| "event_name": event.event_name, | |
| "success": True, | |
| "data": result | |
| }) | |
| except Exception as e: | |
| results.append({ | |
| "event_name": event.event_name, | |
| "success": False, | |
| "error": str(e) | |
| }) | |
| return { | |
| "total": len(events), | |
| "successful": sum(1 for r in results if r["success"]), | |
| "failed": sum(1 for r in results if not r["success"]), | |
| "results": results | |
| } | |
| if __name__ == "__main__": | |
| import os | |
| uvicorn.run( | |
| "app:app", | |
| host="0.0.0.0", | |
| port=int(os.environ.get("PORT", 7860)), | |
| reload=False, | |
| log_level="info" | |
| ) |