Spaces:

minh9972t12
/

autoGenerateTags

Sleeping

App Files Files Community

minh9972t12 commited on Nov 2, 2025

Commit

90fa95b

verified ·

1 Parent(s): 22304be

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -328

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Event Tags Generator - AI Chatbot for automatic tag generation
-Generates relevant tags, keywords, and categories from event information
 """
 from fastapi import FastAPI, HTTPException
@@ -16,9 +16,9 @@ import uvicorn
 # Initialize FastAPI
 app = FastAPI(
-    title="Event Tags Generator API",
-    description="AI-powered automatic tag generation for events using LLM",
-    version="1.0.1"
 )
 # CORS middleware
@@ -39,25 +39,21 @@ else:
 # Pydantic models
-class EventTagsRequest(BaseModel):
     event_name: str
     category: str
     short_description: str
     detailed_description: str
-    max_tags: Optional[int] = 10
-    language: Optional[str] = "vi"  # vi = Vietnamese, en = English
     hf_token: Optional[str] = None
-class EventTagsResponse(BaseModel):
     event_name: str
-    generated_tags: List[str]
-    primary_category: str
-    secondary_categories: List[str]
-    keywords: List[str]
     hashtags: List[str]
     target_audience: List[str]
-    sentiment: str
     confidence_score: float
     generation_time: str
     model_used: str
@@ -68,43 +64,30 @@ async def root():
     """API Information"""
     return {
         "status": "running",
-        "service": "Event Tags Generator API",
-        "version": "1.0.1",
-        "description": "Generate tags, keywords, categories automatically from event info",
         "endpoints": {
-            "POST /generate-tags": {
-                "description": "Generate tags from event information",
                 "request_body": {
                     "event_name": "string - Tên sự kiện",
                     "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
                     "short_description": "string - Mô tả ngắn (1-2 câu)",
                     "detailed_description": "string - Mô tả chi tiết",
-                    "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
-                    "language": "string (optional, default: 'vi') - Ngôn ngữ output",
-                    "hf_token": "string (optional) - Hugging Face token"
                 }
             }
-        },
-        "usage": "POST /generate-tags with event information in JSON body"
     }
-def build_powerful_prompt(
-    event_name: str,
-    category: str,
-    short_desc: str,
-    detailed_desc: str,
-    max_tags: int,
-    language: str
-) -> str:
-    """
-    Build a concise, JSON-focused prompt for better parsing
-    """
     lang_instruction = "tiếng Việt" if language == "vi" else "English"
-    # Shorter, more focused prompt that demands JSON output
-    prompt = f"""Phân tích sự kiện và tạo metadata theo format JSON bên dưới.
 SỰ KIỆN:
 Tên: {event_name}
@@ -112,334 +95,126 @@ Danh mục: {category}
 Mô tả ngắn: {short_desc}
 Mô tả chi tiết: {detailed_desc}
-YÊU CẦU: Tạo output dưới dạng JSON với các trường sau (sử dụng {lang_instruction}):
 {{
-  "tags": ["tag1", "tag2", "tag3", ...],
-  "primary_category": "danh mục chính",
-  "secondary_categories": ["danh mục phụ 1", "danh mục phụ 2"],
-  "keywords": ["keyword1", "keyword2", ...],
-  "hashtags": ["#hashtag1", "#hashtag2", ...],
-  "target_audience": ["đối tượng 1", "đối tượng 2"],
-  "sentiment": "positive/neutral/negative"
 }}
-CHÚ Ý:
-- Tạo tối đa {max_tags} tags
-- Tags phải lowercase, ngắn gọn, dễ tìm kiếm
-- Hashtags bắt đầu bằng #
-- Primary_category chọn từ: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí
-- Chỉ trả về JSON, không thêm text khác
-JSON OUTPUT:"""
     return prompt
-def parse_llm_response(response_text: str, max_tags: int) -> dict:
-    """
-    Parse LLM response - handles both JSON and text formats
-    """
-    # Default result
-    result = {
-        "generated_tags": [],
-        "primary_category": "",
-        "secondary_categories": [],
-        "keywords": [],
-        "hashtags": [],
-        "target_audience": [],
-        "sentiment": "neutral"
-    }
-    # Debug: Print raw response
-    print(f"\n{'='*60}")
-    print(f"RAW RESPONSE FROM MODEL:")
-    print(f"{'='*60}")
-    print(response_text[:500])  # Print first 500 chars
-    print(f"{'='*60}\n")
-    # Try to extract JSON from response
     try:
-        # Method 1: Try direct JSON parse
-        try:
-            data = json.loads(response_text)
-            if isinstance(data, dict):
-                result["generated_tags"] = data.get("tags", [])[:max_tags]
-                result["primary_category"] = data.get("primary_category", "")
-                result["secondary_categories"] = data.get("secondary_categories", [])
-                result["keywords"] = data.get("keywords", [])
-                result["hashtags"] = data.get("hashtags", [])
-                result["target_audience"] = data.get("target_audience", [])
-                result["sentiment"] = data.get("sentiment", "neutral")
-                print("✓ Parsed using direct JSON")
-                return result
-        except json.JSONDecodeError:
-            pass
-        # Method 2: Extract JSON from text using regex
-        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
         if json_match:
-            try:
-                json_str = json_match.group(0)
-                data = json.loads(json_str)
-                result["generated_tags"] = data.get("tags", [])[:max_tags]
-                result["primary_category"] = data.get("primary_category", "")
-                result["secondary_categories"] = data.get("secondary_categories", [])
-                result["keywords"] = data.get("keywords", [])
-                result["hashtags"] = data.get("hashtags", [])
-                result["target_audience"] = data.get("target_audience", [])
-                result["sentiment"] = data.get("sentiment", "neutral")
-                print("✓ Parsed using regex JSON extraction")
-                return result
-            except:
-                pass
-        # Method 3: Parse line by line (fallback)
-        lines = response_text.strip().split('\n')
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            # Parse TAGS
-            if 'tags' in line.lower() and ':' in line:
-                # Extract array content
-                match = re.search(r'\[(.*?)\]', line)
-                if match:
-                    tags_str = match.group(1)
-                    tags = [t.strip().strip('"\'').lower() for t in tags_str.split(',') if t.strip()]
-                    result["generated_tags"] = tags[:max_tags]
-            # Parse PRIMARY_CATEGORY
-            elif 'primary_category' in line.lower() and ':' in line:
-                value = line.split(':', 1)[1].strip().strip(',"\'')
-                result["primary_category"] = value
-            # Parse SECONDARY_CATEGORIES
-            elif 'secondary_categories' in line.lower() and ':' in line:
-                match = re.search(r'\[(.*?)\]', line)
-                if match:
-                    cats_str = match.group(1)
-                    result["secondary_categories"] = [c.strip().strip('"\'') for c in cats_str.split(',') if c.strip()]
-            # Parse KEYWORDS
-            elif 'keywords' in line.lower() and ':' in line:
-                match = re.search(r'\[(.*?)\]', line)
-                if match:
-                    kw_str = match.group(1)
-                    result["keywords"] = [k.strip().strip('"\'') for k in kw_str.split(',') if k.strip()]
-            # Parse HASHTAGS
-            elif 'hashtags' in line.lower() and ':' in line:
-                match = re.search(r'\[(.*?)\]', line)
-                if match:
-                    ht_str = match.group(1)
-                    hashtags = [h.strip().strip('"\'') for h in ht_str.split(',') if h.strip()]
-                    result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
-            # Parse TARGET_AUDIENCE
-            elif 'target_audience' in line.lower() and ':' in line:
-                match = re.search(r'\[(.*?)\]', line)
-                if match:
-                    aud_str = match.group(1)
-                    result["target_audience"] = [a.strip().strip('"\'') for a in aud_str.split(',') if a.strip()]
-            # Parse SENTIMENT
-            elif 'sentiment' in line.lower() and ':' in line:
-                sentiment = line.split(':', 1)[1].strip().strip(',"\'').lower()
-                if sentiment in ['positive', 'neutral', 'negative']:
-                    result["sentiment"] = sentiment
-        print("✓ Parsed using line-by-line fallback")
     except Exception as e:
         print(f"✗ Parsing error: {str(e)}")
     return result
-@app.post("/generate-tags", response_model=EventTagsResponse)
-async def generate_tags(request: EventTagsRequest):
-    """
-    Generate comprehensive tags and metadata for an event
-    """
     try:
         start_time = datetime.utcnow()
-        # Get token
         token = request.hf_token or hf_token
         if not token:
-            raise HTTPException(
-                status_code=401,
-                detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
-            )
-        # Build powerful prompt
-        prompt = build_powerful_prompt(
-            event_name=request.event_name,
-            category=request.category,
-            short_desc=request.short_description,
-            detailed_desc=request.detailed_description,
-            max_tags=request.max_tags,
-            language=request.language
         )
-        # Initialize HF client
         client = InferenceClient(token=token)
-        # Try multiple models for best results
         models_to_try = [
-            "mistralai/Mistral-7B-Instruct-v0.3",
-            "microsoft/Phi-3-mini-4k-instruct",
-            "HuggingFaceH4/zephyr-7b-beta",
-            "meta-llama/Llama-3.2-3B-Instruct",
-            "meta-llama/Meta-Llama-3-8B-Instruct"
         ]
         llm_response = ""
         model_used = ""
-        last_error = None
-        for model_name in models_to_try:
             try:
-                print(f"Trying model: {model_name}")
-                # Format messages
-                messages = [
-                    {
-                        "role": "user",
-                        "content": prompt
-                    }
-                ]
-                # Generate with chat_completion
                 response = client.chat_completion(
-                    messages=messages,
-                    model=model_name,
-                    max_tokens=1000,  # Increased for more content
-                    temperature=0.3,  # Lower temperature for more consistent output
-                    top_p=0.9
                 )
-                # Get response content
                 llm_response = response.choices[0].message.content
-                if llm_response and len(llm_response.strip()) > 20:
-                    model_used = model_name
-                    print(f"✓ Success with {model_name}")
                     break
-            except Exception as model_error:
-                print(f"✗ Failed with {model_name}: {str(model_error)}")
-                last_error = model_error
                 continue
-        # Check if generation succeeded
-        if not llm_response or len(llm_response.strip()) < 20:
-            raise HTTPException(
-                status_code=500,
-                detail=f"All models failed. Last error: {str(last_error)}"
-            )
-        # Parse LLM response
-        parsed_result = parse_llm_response(llm_response, request.max_tags)
-        # If parsing failed, create basic fallback tags
-        if not parsed_result["generated_tags"]:
-            print("⚠ Warning: No tags parsed, creating fallback tags")
-            # Create basic tags from event info
-            fallback_tags = []
-            # Add category as tag
-            if request.category:
-                fallback_tags.append(request.category.lower())
-            # Extract words from event name
-            name_words = [w.lower() for w in request.event_name.split() if len(w) > 3]
-            fallback_tags.extend(name_words[:3])
-            parsed_result["generated_tags"] = fallback_tags[:request.max_tags]
-            parsed_result["primary_category"] = request.category
-            parsed_result["sentiment"] = "positive"
-        # Calculate confidence score
-        confidence = 0.0
-        if parsed_result["generated_tags"]:
-            confidence += 0.3
-        if parsed_result["primary_category"]:
-            confidence += 0.2
-        if parsed_result["keywords"]:
-            confidence += 0.2
-        if parsed_result["hashtags"]:
-            confidence += 0.15
-        if parsed_result["target_audience"]:
-            confidence += 0.15
         end_time = datetime.utcnow()
-        generation_time = (end_time - start_time).total_seconds()
-        # Build response
-        return EventTagsResponse(
             event_name=request.event_name,
-            generated_tags=parsed_result["generated_tags"],
-            primary_category=parsed_result["primary_category"],
-            secondary_categories=parsed_result["secondary_categories"],
-            keywords=parsed_result["keywords"],
-            hashtags=parsed_result["hashtags"],
-            target_audience=parsed_result["target_audience"],
-            sentiment=parsed_result["sentiment"],
             confidence_score=round(confidence, 2),
-            generation_time=f"{generation_time:.2f}s",
-            model_used=model_used.split('/')[-1] if model_used else "unknown"
         )
     except HTTPException:
         raise
     except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=f"Error generating tags: {str(e)}"
-        )
-@app.post("/generate-tags/batch")
-async def generate_tags_batch(events: List[EventTagsRequest]):
-    """
-    Batch generate tags for multiple events
-    """
-    results = []
-    for event in events:
-        try:
-            result = await generate_tags(event)
-            results.append({
-                "event_name": event.event_name,
-                "success": True,
-                "data": result
-            })
-        except Exception as e:
-            results.append({
-                "event_name": event.event_name,
-                "success": False,
-                "error": str(e)
-            })
-    return {
-        "total": len(events),
-        "successful": sum(1 for r in results if r["success"]),
-        "failed": sum(1 for r in results if not r["success"]),
-        "results": results
-    }
 if __name__ == "__main__":
-    import os
     uvicorn.run(
         "app:app",
         host="0.0.0.0",
-        port=int(os.environ.get("PORT", 7860)),
-        reload=False,
-        log_level="info"
-    )

 """
+Event Hashtag Generator - AI Chatbot for automatic hashtag generation
+Generates viral hashtags, keywords, and target audience insights from event data
 """
 from fastapi import FastAPI, HTTPException
 # Initialize FastAPI
 app = FastAPI(
+    title="Event Hashtag Generator API",
+    description="AI-powered automatic hashtag and keyword generation for events",
+    version="2.0.0"
 )
 # CORS middleware
 # Pydantic models
+class EventHashtagRequest(BaseModel):
     event_name: str
     category: str
     short_description: str
     detailed_description: str
+    max_hashtags: Optional[int] = 10
+    language: Optional[str] = "vi"
     hf_token: Optional[str] = None
+class EventHashtagResponse(BaseModel):
     event_name: str
     hashtags: List[str]
+    keywords: List[str]
     target_audience: List[str]
     confidence_score: float
     generation_time: str
     model_used: str
     """API Information"""
     return {
         "status": "running",
+        "service": "Event Hashtag Generator API",
+        "version": "2.0.0",
+        "description": "Generate hashtags, keywords, and target audience from event info",
         "endpoints": {
+            "POST /generate-hashtags": {
+                "description": "Generate viral hashtags for events",
                 "request_body": {
                     "event_name": "string - Tên sự kiện",
                     "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
                     "short_description": "string - Mô tả ngắn (1-2 câu)",
                     "detailed_description": "string - Mô tả chi tiết",
+                    "max_hashtags": "integer (optional, default: 10)",
+                    "language": "string (optional, default: 'vi')",
+                    "hf_token": "string (optional)"
                 }
             }
+        }
     }
+def build_hashtag_prompt(event_name: str, category: str, short_desc: str, detailed_desc: str, max_hashtags: int, language: str) -> str:
+    """Prompt chỉ tập trung vào hashtag, keywords và audience."""
     lang_instruction = "tiếng Việt" if language == "vi" else "English"
+    prompt = f"""Phân tích sự kiện sau và tạo ra các hashtag lan truyền mạnh mẽ, cùng với từ khóa và đối tượng mục tiêu.
 SỰ KIỆN:
 Tên: {event_name}
 Mô tả ngắn: {short_desc}
 Mô tả chi tiết: {detailed_desc}
+YÊU CẦU:
+- Tạo tối đa {max_hashtags} hashtag độc đáo, dễ nhớ, dễ viral, liên quan đến sự kiện.
+- Mỗi hashtag phải bắt đầu bằng #.
+- Ngôn ngữ: {lang_instruction}.
+- Cung cấp thêm:
+  - Danh sách từ khóa (keywords) liên quan đến sự kiện.
+  - Danh sách đối tượng khán giả mục tiêu (target audience) phù hợp.
+- Không trả lời giải thích, chỉ xuất JSON.
+JSON OUTPUT:
 {{
+  "hashtags": ["#TênSựKiện", "#Hashtag2", "#Hashtag3"],
+  "keywords": ["keyword1", "keyword2"],
+  "target_audience": ["đối tượng 1", "đối tượng 2"]
 }}
+CHỈ TRẢ VỀ JSON, KHÔNG THÊM TEXT KHÁC.
+"""
     return prompt
+def parse_llm_response(response_text: str) -> dict:
+    """Parse JSON từ model trả về."""
+    result = {"hashtags": [], "keywords": [], "target_audience": []}
     try:
+        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
         if json_match:
+            data = json.loads(json_match.group(0))
+            result["hashtags"] = data.get("hashtags", [])
+            result["keywords"] = data.get("keywords", [])
+            result["target_audience"] = data.get("target_audience", [])
+            print("✓ Parsed JSON successfully")
+        else:
+            print("⚠ No valid JSON found")
     except Exception as e:
         print(f"✗ Parsing error: {str(e)}")
     return result
+@app.post("/generate-hashtags", response_model=EventHashtagResponse)
+async def generate_hashtags(request: EventHashtagRequest):
+    """Generate viral hashtags, keywords, and target audience for an event."""
     try:
         start_time = datetime.utcnow()
         token = request.hf_token or hf_token
         if not token:
+            raise HTTPException(status_code=401, detail="HUGGINGFACE_TOKEN required.")
+        prompt = build_hashtag_prompt(
+            request.event_name,
+            request.category,
+            request.short_description,
+            request.detailed_description,
+            request.max_hashtags,
+            request.language
         )
         client = InferenceClient(token=token)
         models_to_try = [
+            "KiLM-13b",
+            "Viet-Mistral/Vistral-7B-Chat",
+            "vilm-ai/VinaLLaMA-7B-chat"
         ]
         llm_response = ""
         model_used = ""
+        for model in models_to_try:
             try:
+                print(f"Trying model: {model}")
                 response = client.chat_completion(
+                    model=model,
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=800,
+                    temperature=0.6,
                 )
                 llm_response = response.choices[0].message.content
+                if llm_response and len(llm_response) > 20:
+                    model_used = model
                     break
+            except Exception as e:
+                print(f"✗ Failed with {model}: {e}")
                 continue
+        if not llm_response:
+            raise HTTPException(status_code=500, detail="All models failed to respond.")
+        parsed = parse_llm_response(llm_response)
+        # Fallback nếu model không trả được hashtag
+        if not parsed["hashtags"]:
+            print("⚠ Creating fallback hashtags")
+            base = re.sub(r'[^a-zA-Z0-9 ]', '', request.event_name)
+            words = base.split()
+            parsed["hashtags"] = [f"#{w.capitalize()}" for w in words[:request.max_hashtags]]
+        # Tính confidence đơn giản
+        confidence = 0.3 * bool(parsed["hashtags"]) + 0.3 * bool(parsed["keywords"]) + 0.4 * bool(parsed["target_audience"])
         end_time = datetime.utcnow()
+        return EventHashtagResponse(
             event_name=request.event_name,
+            hashtags=parsed["hashtags"][:request.max_hashtags],
+            keywords=parsed["keywords"],
+            target_audience=parsed["target_audience"],
             confidence_score=round(confidence, 2),
+            generation_time=f"{(end_time - start_time).total_seconds():.2f}s",
+            model_used=model_used.split("/")[-1],
         )
     except HTTPException:
         raise
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 if __name__ == "__main__":
     uvicorn.run(
         "app:app",
         host="0.0.0.0",
+        port=int(os.environ.get("PORT", 7860)),
+        reload=False,
+        log_level="info",
+    )