Spaces:

minh9972t12
/

chatbot_test

Running

App Files Files Community

chatbot_test / app.py

minh9972t12

Update app.py

2d6bd6e verified 2 months ago

raw

history blame

15.1 kB

	"""
	Event Tags Generator - AI Chatbot for automatic tag generation
	Generates relevant tags, keywords, and categories from event information
	"""

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from typing import Optional, List
	from datetime import datetime
	import os
	from huggingface_hub import InferenceClient
	import uvicorn
	# Initialize FastAPI
	app = FastAPI(
	title="Event Tags Generator API",
	description="AI-powered automatic tag generation for events using LLM",
	version="1.0.0"
	)

	# CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Hugging Face token
	hf_token = os.getenv("HUGGINGFACE_TOKEN")
	if hf_token:
	print("✓ Hugging Face token configured")
	else:
	print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")


	# Pydantic models
	class EventTagsRequest(BaseModel):
	event_name: str
	category: str
	short_description: str
	detailed_description: str
	max_tags: Optional[int] = 10
	language: Optional[str] = "vi" # vi = Vietnamese, en = English
	hf_token: Optional[str] = None


	class EventTagsResponse(BaseModel):
	event_name: str
	generated_tags: List[str]
	primary_category: str
	secondary_categories: List[str]
	keywords: List[str]
	hashtags: List[str]
	target_audience: List[str]
	sentiment: str
	confidence_score: float
	generation_time: str
	model_used: str


	@app.get("/")
	async def root():
	"""API Information"""
	return {
	"status": "running",
	"service": "Event Tags Generator API",
	"version": "1.0.0",
	"description": "Generate tags, keywords, categories automatically from event info",
	"endpoints": {
	"POST /generate-tags": {
	"description": "Generate tags from event information",
	"request_body": {
	"event_name": "string - Tên sự kiện",
	"category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
	"short_description": "string - Mô tả ngắn (1-2 câu)",
	"detailed_description": "string - Mô tả chi tiết",
	"max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
	"language": "string (optional, default: 'vi') - Ngôn ngữ output",
	"hf_token": "string (optional) - Hugging Face token"
	},
	"response": {
	"generated_tags": "array - Danh sách tags",
	"primary_category": "string - Danh mục chính",
	"secondary_categories": "array - Danh mục phụ",
	"keywords": "array - Keywords SEO",
	"hashtags": "array - Social media hashtags",
	"target_audience": "array - Đối tượng mục tiêu",
	"sentiment": "string - Cảm xúc (positive/neutral/negative)",
	"confidence_score": "float - Độ tin cậy (0-1)"
	},
	"example": {
	"request": {
	"event_name": "Vietnam Music Festival 2025",
	"category": "Âm nhạc",
	"short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
	"detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
	},
	"response": {
	"generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
	"hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
	}
	}
	}
	},
	"usage": "POST /generate-tags with event information in JSON body"
	}


	def build_powerful_prompt(
	event_name: str,
	category: str,
	short_desc: str,
	detailed_desc: str,
	max_tags: int,
	language: str
	) -> str:
	"""
	Build a powerful, structured prompt for LLM to generate high-quality tags
	"""

	lang_instruction = "in Vietnamese" if language == "vi" else "in English"

	prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
	EVENT INFORMATION:
	• Event Name: {event_name}
	• Primary Category: {category}
	• Short Description: {short_desc}
	• Detailed Description: {detailed_desc}
	YOUR TASK:
	Analyze the event information above and generate the following {lang_instruction}:
	1. TAGS ({max_tags} tags maximum):
	- Generate specific, relevant, searchable tags
	- Include event type, theme, activities, location references
	- Mix broad and specific tags for better discoverability
	- Use lowercase, single words or short phrases
	- Example format: âm nhạc, festival, concert, outdoor, hà nội
	2. PRIMARY CATEGORY (1 category):
	- The main category that best describes this event
	- Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
	3. SECONDARY CATEGORIES (2-3 categories):
	- Additional relevant categories
	- Help with cross-categorization
	4. KEYWORDS (5-8 keywords):
	- SEO-optimized keywords for search engines
	- Include long-tail keywords
	- Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
	5. HASHTAGS (5-7 hashtags):
	- Social media friendly hashtags
	- Mix of popular and unique hashtags
	- Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
	6. TARGET AUDIENCE (2-4 audience groups):
	- Who would be interested in this event?
	- Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
	7. SENTIMENT (one word):
	- Overall emotion/feeling: positive, neutral, or negative
	- Based on event description tone
	OUTPUT FORMAT (JSON-like structure):
	TAGS: tag1, tag2, tag3, ...
	PRIMARY_CATEGORY: category_name
	SECONDARY_CATEGORIES: cat1, cat2, cat3
	KEYWORDS: keyword1, keyword2, keyword3, ...
	HASHTAGS: #tag1, #tag2, #tag3, ...
	TARGET_AUDIENCE: audience1, audience2, audience3
	SENTIMENT: positive/neutral/negative
	IMPORTANT GUIDELINES:
	- Be specific and relevant to the event
	- Use terms people would actually search for
	- Balance between popular and niche terms
	- Consider SEO and social media best practices
	- Keep tags concise and meaningful
	- Generate output {lang_instruction}
	Now, analyze the event and generate the metadata:"""

	return prompt


	def parse_llm_response(response_text: str, max_tags: int) -> dict:
	"""
	Parse LLM response into structured format
	Handles various response formats robustly
	"""

	result = {
	"generated_tags": [],
	"primary_category": "",
	"secondary_categories": [],
	"keywords": [],
	"hashtags": [],
	"target_audience": [],
	"sentiment": "neutral"
	}

	lines = response_text.strip().split('\n')

	for line in lines:
	line = line.strip()
	if not line:
	continue

	# Parse TAGS
	if line.upper().startswith('TAGS:'):
	tags_text = line.split(':', 1)[1].strip()
	tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
	result["generated_tags"] = tags[:max_tags]

	# Parse PRIMARY_CATEGORY
	elif line.upper().startswith('PRIMARY_CATEGORY:'):
	result["primary_category"] = line.split(':', 1)[1].strip()

	# Parse SECONDARY_CATEGORIES
	elif line.upper().startswith('SECONDARY_CATEGORIES:'):
	cats_text = line.split(':', 1)[1].strip()
	result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]

	# Parse KEYWORDS
	elif line.upper().startswith('KEYWORDS:'):
	kw_text = line.split(':', 1)[1].strip()
	result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]

	# Parse HASHTAGS
	elif line.upper().startswith('HASHTAGS:'):
	ht_text = line.split(':', 1)[1].strip()
	hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
	# Ensure hashtags start with #
	result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]

	# Parse TARGET_AUDIENCE
	elif line.upper().startswith('TARGET_AUDIENCE:'):
	aud_text = line.split(':', 1)[1].strip()
	result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]

	# Parse SENTIMENT
	elif line.upper().startswith('SENTIMENT:'):
	sentiment = line.split(':', 1)[1].strip().lower()
	if sentiment in ['positive', 'neutral', 'negative']:
	result["sentiment"] = sentiment

	return result


	@app.post("/generate-tags", response_model=EventTagsResponse)
	async def generate_tags(request: EventTagsRequest):
	"""
	Generate comprehensive tags and metadata for an event

	This endpoint uses advanced LLM prompting to generate:
	- Relevant tags for searchability
	- Category classification
	- SEO keywords
	- Social media hashtags
	- Target audience identification
	- Sentiment analysis

	Input:
	- event_name: Name of the event
	- category: Primary category (music, sports, tech, etc.)
	- short_description: Brief 1-2 sentence description
	- detailed_description: Full event description with details

	Output:
	- Structured metadata ready for use in event management system
	- All fields optimized for search and discovery
	"""

	try:
	start_time = datetime.utcnow()

	# Get token
	token = request.hf_token or hf_token

	if not token:
	raise HTTPException(
	status_code=401,
	detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
	)

	# Build powerful prompt
	prompt = build_powerful_prompt(
	event_name=request.event_name,
	category=request.category,
	short_desc=request.short_description,
	detailed_desc=request.detailed_description,
	max_tags=request.max_tags,
	language=request.language
	)

	# Initialize HF client
	client = InferenceClient(token=token)

	# Try multiple models for best results
	models_to_try = [
	"microsoft/Phi-3-mini-4k-instruct",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"HuggingFaceH4/zephyr-7b-beta",
	"meta-llama/Llama-3.2-3B-Instruct"
	]

	llm_response = ""
	model_used = ""
	last_error = None

	for model_name in models_to_try:
	try:
	print(f"Trying model: {model_name}")

	# Generate with LLM
	llm_response = client.text_generation(
	prompt,
	model=model_name,
	max_new_tokens=800,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	return_full_text=False
	)

	if llm_response and len(llm_response.strip()) > 50:
	model_used = model_name
	print(f"✓ Success with {model_name}")
	break

	except Exception as model_error:
	print(f"✗ Failed with {model_name}: {str(model_error)}")
	last_error = model_error
	continue

	# Check if generation succeeded
	if not llm_response or len(llm_response.strip()) < 50:
	raise HTTPException(
	status_code=500,
	detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
	)

	# Parse LLM response into structured format
	parsed_result = parse_llm_response(llm_response, request.max_tags)

	# Calculate confidence score (basic heuristic)
	confidence = 0.0
	if parsed_result["generated_tags"]:
	confidence += 0.3
	if parsed_result["primary_category"]:
	confidence += 0.2
	if parsed_result["keywords"]:
	confidence += 0.2
	if parsed_result["hashtags"]:
	confidence += 0.15
	if parsed_result["target_audience"]:
	confidence += 0.15

	end_time = datetime.utcnow()
	generation_time = (end_time - start_time).total_seconds()

	# Build response
	return EventTagsResponse(
	event_name=request.event_name,
	generated_tags=parsed_result["generated_tags"],
	primary_category=parsed_result["primary_category"],
	secondary_categories=parsed_result["secondary_categories"],
	keywords=parsed_result["keywords"],
	hashtags=parsed_result["hashtags"],
	target_audience=parsed_result["target_audience"],
	sentiment=parsed_result["sentiment"],
	confidence_score=round(confidence, 2),
	generation_time=f"{generation_time:.2f}s",
	model_used=model_used.split('/')[-1] if model_used else "unknown"
	)

	except HTTPException:
	raise
	except Exception as e:
	raise HTTPException(
	status_code=500,
	detail=f"Error generating tags: {str(e)}"
	)


	@app.post("/generate-tags/batch")
	async def generate_tags_batch(events: List[EventTagsRequest]):
	"""
	Batch generate tags for multiple events

	Useful for bulk processing or migrating existing events
	"""
	results = []

	for event in events:
	try:
	result = await generate_tags(event)
	results.append({
	"event_name": event.event_name,
	"success": True,
	"data": result
	})
	except Exception as e:
	results.append({
	"event_name": event.event_name,
	"success": False,
	"error": str(e)
	})

	return {
	"total": len(events),
	"successful": sum(1 for r in results if r["success"]),
	"failed": sum(1 for r in results if not r["success"]),
	"results": results
	}



	if __name__ == "__main__":
	import os
	uvicorn.run(
	"app:app",
	host="0.0.0.0",
	port=int(os.environ.get("PORT", 7860)),
	reload=False,
	log_level="info"
	)