chatbot_test / app.py
minh9972t12's picture
Update app.py
2d6bd6e verified
raw
history blame
15.1 kB
"""
Event Tags Generator - AI Chatbot for automatic tag generation
Generates relevant tags, keywords, and categories from event information
"""
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List
from datetime import datetime
import os
from huggingface_hub import InferenceClient
import uvicorn
# Initialize FastAPI
app = FastAPI(
title="Event Tags Generator API",
description="AI-powered automatic tag generation for events using LLM",
version="1.0.0"
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Hugging Face token
hf_token = os.getenv("HUGGINGFACE_TOKEN")
if hf_token:
print("✓ Hugging Face token configured")
else:
print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
# Pydantic models
class EventTagsRequest(BaseModel):
event_name: str
category: str
short_description: str
detailed_description: str
max_tags: Optional[int] = 10
language: Optional[str] = "vi" # vi = Vietnamese, en = English
hf_token: Optional[str] = None
class EventTagsResponse(BaseModel):
event_name: str
generated_tags: List[str]
primary_category: str
secondary_categories: List[str]
keywords: List[str]
hashtags: List[str]
target_audience: List[str]
sentiment: str
confidence_score: float
generation_time: str
model_used: str
@app.get("/")
async def root():
"""API Information"""
return {
"status": "running",
"service": "Event Tags Generator API",
"version": "1.0.0",
"description": "Generate tags, keywords, categories automatically from event info",
"endpoints": {
"POST /generate-tags": {
"description": "Generate tags from event information",
"request_body": {
"event_name": "string - Tên sự kiện",
"category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
"short_description": "string - Mô tả ngắn (1-2 câu)",
"detailed_description": "string - Mô tả chi tiết",
"max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
"language": "string (optional, default: 'vi') - Ngôn ngữ output",
"hf_token": "string (optional) - Hugging Face token"
},
"response": {
"generated_tags": "array - Danh sách tags",
"primary_category": "string - Danh mục chính",
"secondary_categories": "array - Danh mục phụ",
"keywords": "array - Keywords SEO",
"hashtags": "array - Social media hashtags",
"target_audience": "array - Đối tượng mục tiêu",
"sentiment": "string - Cảm xúc (positive/neutral/negative)",
"confidence_score": "float - Độ tin cậy (0-1)"
},
"example": {
"request": {
"event_name": "Vietnam Music Festival 2025",
"category": "Âm nhạc",
"short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
"detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
},
"response": {
"generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
"hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
}
}
}
},
"usage": "POST /generate-tags with event information in JSON body"
}
def build_powerful_prompt(
event_name: str,
category: str,
short_desc: str,
detailed_desc: str,
max_tags: int,
language: str
) -> str:
"""
Build a powerful, structured prompt for LLM to generate high-quality tags
"""
lang_instruction = "in Vietnamese" if language == "vi" else "in English"
prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
**EVENT INFORMATION:**
• Event Name: {event_name}
• Primary Category: {category}
• Short Description: {short_desc}
• Detailed Description: {detailed_desc}
**YOUR TASK:**
Analyze the event information above and generate the following {lang_instruction}:
1. **TAGS** ({max_tags} tags maximum):
- Generate specific, relevant, searchable tags
- Include event type, theme, activities, location references
- Mix broad and specific tags for better discoverability
- Use lowercase, single words or short phrases
- Example format: âm nhạc, festival, concert, outdoor, hà nội
2. **PRIMARY CATEGORY** (1 category):
- The main category that best describes this event
- Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
3. **SECONDARY CATEGORIES** (2-3 categories):
- Additional relevant categories
- Help with cross-categorization
4. **KEYWORDS** (5-8 keywords):
- SEO-optimized keywords for search engines
- Include long-tail keywords
- Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
5. **HASHTAGS** (5-7 hashtags):
- Social media friendly hashtags
- Mix of popular and unique hashtags
- Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
6. **TARGET AUDIENCE** (2-4 audience groups):
- Who would be interested in this event?
- Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
7. **SENTIMENT** (one word):
- Overall emotion/feeling: positive, neutral, or negative
- Based on event description tone
**OUTPUT FORMAT (JSON-like structure):**
TAGS: tag1, tag2, tag3, ...
PRIMARY_CATEGORY: category_name
SECONDARY_CATEGORIES: cat1, cat2, cat3
KEYWORDS: keyword1, keyword2, keyword3, ...
HASHTAGS: #tag1, #tag2, #tag3, ...
TARGET_AUDIENCE: audience1, audience2, audience3
SENTIMENT: positive/neutral/negative
**IMPORTANT GUIDELINES:**
- Be specific and relevant to the event
- Use terms people would actually search for
- Balance between popular and niche terms
- Consider SEO and social media best practices
- Keep tags concise and meaningful
- Generate output {lang_instruction}
Now, analyze the event and generate the metadata:"""
return prompt
def parse_llm_response(response_text: str, max_tags: int) -> dict:
"""
Parse LLM response into structured format
Handles various response formats robustly
"""
result = {
"generated_tags": [],
"primary_category": "",
"secondary_categories": [],
"keywords": [],
"hashtags": [],
"target_audience": [],
"sentiment": "neutral"
}
lines = response_text.strip().split('\n')
for line in lines:
line = line.strip()
if not line:
continue
# Parse TAGS
if line.upper().startswith('TAGS:'):
tags_text = line.split(':', 1)[1].strip()
tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
result["generated_tags"] = tags[:max_tags]
# Parse PRIMARY_CATEGORY
elif line.upper().startswith('PRIMARY_CATEGORY:'):
result["primary_category"] = line.split(':', 1)[1].strip()
# Parse SECONDARY_CATEGORIES
elif line.upper().startswith('SECONDARY_CATEGORIES:'):
cats_text = line.split(':', 1)[1].strip()
result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
# Parse KEYWORDS
elif line.upper().startswith('KEYWORDS:'):
kw_text = line.split(':', 1)[1].strip()
result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
# Parse HASHTAGS
elif line.upper().startswith('HASHTAGS:'):
ht_text = line.split(':', 1)[1].strip()
hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
# Ensure hashtags start with #
result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
# Parse TARGET_AUDIENCE
elif line.upper().startswith('TARGET_AUDIENCE:'):
aud_text = line.split(':', 1)[1].strip()
result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
# Parse SENTIMENT
elif line.upper().startswith('SENTIMENT:'):
sentiment = line.split(':', 1)[1].strip().lower()
if sentiment in ['positive', 'neutral', 'negative']:
result["sentiment"] = sentiment
return result
@app.post("/generate-tags", response_model=EventTagsResponse)
async def generate_tags(request: EventTagsRequest):
"""
Generate comprehensive tags and metadata for an event
This endpoint uses advanced LLM prompting to generate:
- Relevant tags for searchability
- Category classification
- SEO keywords
- Social media hashtags
- Target audience identification
- Sentiment analysis
**Input:**
- event_name: Name of the event
- category: Primary category (music, sports, tech, etc.)
- short_description: Brief 1-2 sentence description
- detailed_description: Full event description with details
**Output:**
- Structured metadata ready for use in event management system
- All fields optimized for search and discovery
"""
try:
start_time = datetime.utcnow()
# Get token
token = request.hf_token or hf_token
if not token:
raise HTTPException(
status_code=401,
detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
)
# Build powerful prompt
prompt = build_powerful_prompt(
event_name=request.event_name,
category=request.category,
short_desc=request.short_description,
detailed_desc=request.detailed_description,
max_tags=request.max_tags,
language=request.language
)
# Initialize HF client
client = InferenceClient(token=token)
# Try multiple models for best results
models_to_try = [
"microsoft/Phi-3-mini-4k-instruct",
"mistralai/Mistral-7B-Instruct-v0.3",
"HuggingFaceH4/zephyr-7b-beta",
"meta-llama/Llama-3.2-3B-Instruct"
]
llm_response = ""
model_used = ""
last_error = None
for model_name in models_to_try:
try:
print(f"Trying model: {model_name}")
# Generate with LLM
llm_response = client.text_generation(
prompt,
model=model_name,
max_new_tokens=800,
temperature=0.7,
top_p=0.9,
do_sample=True,
return_full_text=False
)
if llm_response and len(llm_response.strip()) > 50:
model_used = model_name
print(f"✓ Success with {model_name}")
break
except Exception as model_error:
print(f"✗ Failed with {model_name}: {str(model_error)}")
last_error = model_error
continue
# Check if generation succeeded
if not llm_response or len(llm_response.strip()) < 50:
raise HTTPException(
status_code=500,
detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
)
# Parse LLM response into structured format
parsed_result = parse_llm_response(llm_response, request.max_tags)
# Calculate confidence score (basic heuristic)
confidence = 0.0
if parsed_result["generated_tags"]:
confidence += 0.3
if parsed_result["primary_category"]:
confidence += 0.2
if parsed_result["keywords"]:
confidence += 0.2
if parsed_result["hashtags"]:
confidence += 0.15
if parsed_result["target_audience"]:
confidence += 0.15
end_time = datetime.utcnow()
generation_time = (end_time - start_time).total_seconds()
# Build response
return EventTagsResponse(
event_name=request.event_name,
generated_tags=parsed_result["generated_tags"],
primary_category=parsed_result["primary_category"],
secondary_categories=parsed_result["secondary_categories"],
keywords=parsed_result["keywords"],
hashtags=parsed_result["hashtags"],
target_audience=parsed_result["target_audience"],
sentiment=parsed_result["sentiment"],
confidence_score=round(confidence, 2),
generation_time=f"{generation_time:.2f}s",
model_used=model_used.split('/')[-1] if model_used else "unknown"
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error generating tags: {str(e)}"
)
@app.post("/generate-tags/batch")
async def generate_tags_batch(events: List[EventTagsRequest]):
"""
Batch generate tags for multiple events
Useful for bulk processing or migrating existing events
"""
results = []
for event in events:
try:
result = await generate_tags(event)
results.append({
"event_name": event.event_name,
"success": True,
"data": result
})
except Exception as e:
results.append({
"event_name": event.event_name,
"success": False,
"error": str(e)
})
return {
"total": len(events),
"successful": sum(1 for r in results if r["success"]),
"failed": sum(1 for r in results if not r["success"]),
"results": results
}
if __name__ == "__main__":
import os
uvicorn.run(
"app:app",
host="0.0.0.0",
port=int(os.environ.get("PORT", 7860)),
reload=False,
log_level="info"
)