Spaces:
Sleeping
Sleeping
| """ | |
| FastAPI ์๋ฒ - ์ ํ๋ธ ์์ ๋ถ์ API | |
| ํ๊น ํ์ด์ค ์คํ์ด์ค ๋ฐฐํฌ์ฉ | |
| """ | |
| from fastapi import FastAPI, HTTPException, Query | |
| from fastapi.responses import JSONResponse, HTMLResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| from typing import Optional, Literal | |
| import os | |
| import tempfile | |
| import json | |
| from pathlib import Path | |
| # ๋ก์ปฌ ๋ชจ๋ import | |
| from youtube_segmenter import process_youtube_video | |
| from youtube_summarizer import summarize_youtube_segments | |
| # FastAPI ์ฑ ์์ฑ | |
| app = FastAPI( | |
| title="YouTube Video Analyzer API", | |
| description=""" | |
| # ์ ํ๋ธ ์์ ๋ถ์ API | |
| ์ด API๋ ์ ํ๋ธ ์์์ ์๋ง์ ์ฃผ์ ๋ณ๋ก ๋ถ๋ฆฌํ๊ณ AI๋ก ์์ฝํ๋ ์๋น์ค์ ๋๋ค. | |
| ## ์ฃผ์ ๊ธฐ๋ฅ | |
| ### 1. ์ธ๊ทธ๋จผํธ ๋ถ๋ฆฌ | |
| - **ruptures PELT ์๊ณ ๋ฆฌ์ฆ**: ํต๊ณ์ ๋ณํ์ ํ์ง๋ก ๋์ฃผ์ ๋ถ๋ฆฌ | |
| - **์๋ฏธ ์ ์ฌ๋ ๋ถ์**: Sentence Transformers๋ก ์์ฃผ์ ์ ๋ฐ ๋ถํ | |
| - **ํ์์คํฌํ ๋งคํ**: ๊ฐ ์ธ๊ทธ๋จผํธ์ ์์ ์์ ์๊ฐ ์ ๊ณต | |
| ### 2. AI ์์ฝ | |
| - **Gemini 2.0 Flash Lite**: Google์ ์ต์ ๊ฒฝ๋ AI ๋ชจ๋ธ ์ฌ์ฉ | |
| - **์ฃผ์ ๋ผ๋ฒจ๋ง**: 2-5๋จ์ด๋ก ํต์ฌ ์ฃผ์ ์ถ์ถ | |
| - **์ค๋งํธ ํํฐ๋ง**: ์๋ฏธ ์๋ ์งง์ ๋ฆฌ์ก์ ์๋ ์ ์ธ | |
| ## ๊ธฐ์ ์คํ | |
| - **youtube-transcript-api**: ์๋ง ์ถ์ถ | |
| - **ruptures**: ํต๊ณ์ ๋ณํ์ ํ์ง | |
| - **sentence-transformers**: ๋ฌธ์ฅ ์๋ฒ ๋ฉ | |
| - **google-generativeai**: AI ์์ฝ | |
| - **FastAPI**: REST API ์๋ฒ | |
| ## ํ๋ผ๋ฏธํฐ ๊ฐ์ด๋ | |
| ### Penalty (๋์ฃผ์ ๋ถ๋ฆฌ) | |
| - **1.0-3.0**: ๋งค์ฐ ์ธ๋ฐํ ๋ถ๋ฆฌ (๋ง์ ์ฃผ์ ) | |
| - **5.0**: ๊ท ํ์กํ ๋ถ๋ฆฌ (๊ธฐ๋ณธ๊ฐ) | |
| - **7.0-10.0**: ํฐ ์ฃผ์ ๋จ์ ๋ถ๋ฆฌ | |
| - **15.0-30.0**: ๋งค์ฐ ํฐ ๋ฉ์ด๋ฆฌ๋ก ๋ถ๋ฆฌ | |
| ### Threshold (์์ฃผ์ ๋ถ๋ฆฌ) | |
| - **70-80**: ์ธ๋ฐํ ๋ถ๋ฆฌ | |
| - **85-92**: ๊ท ํ์กํ ๋ถ๋ฆฌ (๊ถ์ฅ) | |
| - **90**: ๊ธฐ๋ณธ๊ฐ | |
| - **95-98**: ํฐ ๋ฉ์ด๋ฆฌ ๋ถ๋ฆฌ | |
| ## ์ฌ์ฉ ์์ | |
| ### GET ์์ฒญ | |
| ``` | |
| /process?youtube_url=https://youtube.com/watch?v=VIDEO_ID&mode=summary&penalty=5.0&threshold=90&gemini_api_key=YOUR_KEY&youtube_api_key=YOUR_KEY | |
| ``` | |
| ### POST ์์ฒญ | |
| ```json | |
| { | |
| "youtube_url": "https://youtube.com/watch?v=VIDEO_ID", | |
| "mode": "summary", | |
| "penalty": 5.0, | |
| "threshold": 90, | |
| "gemini_api_key": "your_api_key_here", | |
| "youtube_api_key": "your_youtube_api_key_here" | |
| } | |
| ``` | |
| """, | |
| version="1.0.0", | |
| contact={ | |
| "name": "YouTube Analyzer", | |
| "url": "https://github.com/yourusername/youtube-analyzer" | |
| } | |
| ) | |
| # CORS ์ค์ | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Request ๋ชจ๋ธ | |
| class ProcessRequest(BaseModel): | |
| youtube_api_key: str = Field(..., description="YouTube API ํค (ํ์)", example="YOUR_YOUTUBE_API_KEY") | |
| youtube_url: str = Field(..., description="์ ํ๋ธ URL", example="https://www.youtube.com/watch?v=tLwZfLMHjKo") | |
| mode: Literal["segment", "summary"] = Field( | |
| default="summary", | |
| description="์ฒ๋ฆฌ ๋ชจ๋: 'segment' (๋ถ๋ฆฌ๋ง) ๋๋ 'summary' (๋ถ๋ฆฌ+์์ฝ)" | |
| ) | |
| penalty: float = Field( | |
| default=5.0, | |
| ge=1.0, | |
| le=30.0, | |
| description="๋์ฃผ์ ๋ถ๋ฆฌ ๋ฏผ๊ฐ๋ (1.0-30.0)\n- **์์ ๊ฐ (1.0~3.0)**: ๋ณํ๋ฅผ ๋งค์ฐ ๋ฏผ๊ฐํ๊ฒ ๊ฐ์งํ์ฌ ์ฃผ์ ๋ฅผ ์๊ฒ ์ชผ๊ฐญ๋๋ค.\n- **๊ธฐ๋ณธ ๊ฐ (5.0)**: ๊ถ์ฅํ๋ ๊ธฐ๋ณธ ์ค์ ์ ๋๋ค.\n- **ํฐ ๊ฐ (10.0+)**: ์ ์ฒด์ ์ธ ํฐ ํ๋ฆ ์์ฃผ๋ก ๊ตต์งํ๊ฒ ์ฃผ์ ๋ฅผ ๋๋๋๋ค." | |
| ) | |
| threshold: int = Field( | |
| default=90, | |
| ge=50, | |
| le=99, | |
| description="์์ฃผ์ ๋ถ๋ฆฌ ๋ฏผ๊ฐ๋ (50-99)\n- **๋ฎ์ ๊ฐ (70-80)**: ๋ฌธ์ฅ ๊ฐ์ ๋ฏธ์ธํ ์ฐจ์ด๋ ๊ฐ์งํ์ฌ ์ธ๋ฐํ๊ฒ ๋๋๋๋ค.\n- **๊ธฐ๋ณธ ๊ฐ (90)**: ๊ถ์ฅํ๋ ๊ธฐ๋ณธ ์ค์ ์ ๋๋ค.\n- **๋์ ๊ฐ (95+)**: ์์ฃผ ํ์คํ ์ฃผ์ ๋ณํ๊ฐ ์์ ๋๋ง ๋๋๋๋ค." | |
| ) | |
| gemini_api_key: Optional[str] = Field( | |
| default=None, | |
| description="Gemini API ํค (์์ฝ ๋ชจ๋์์ ํ์)" | |
| ) | |
| class Config: | |
| schema_extra = { | |
| "example": { | |
| "youtube_url": "https://www.youtube.com/watch?v=tLwZfLMHjKo", | |
| "mode": "summary", | |
| "penalty": 5.0, | |
| "threshold": 90, | |
| "gemini_api_key": "your_gemini_api_key_here", | |
| "youtube_api_key": "your_youtube_api_key_here" | |
| } | |
| } | |
| # Response ๋ชจ๋ธ | |
| class SegmentItem(BaseModel): | |
| url: str | |
| chunk_id: str | |
| chunk_time: str | |
| text: str | |
| class SummaryItem(BaseModel): | |
| url: str | |
| chunk_id: str | |
| chunk_time: str | |
| text: str | |
| summary: Optional[str] | |
| topic: Optional[str] | |
| async def root(): | |
| """API ํํ์ด์ง - Swagger UI๋ก ๋ฆฌ๋ค์ด๋ ํธ""" | |
| return """ | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <meta http-equiv="refresh" content="0; url=/docs"> | |
| <title>Redirecting...</title> | |
| </head> | |
| <body> | |
| <p>Redirecting to <a href="/docs">API Documentation</a>...</p> | |
| </body> | |
| </html> | |
| """ | |
| async def health_check(): | |
| """ํฌ์ค ์ฒดํฌ ์๋ํฌ์ธํธ""" | |
| return {"status": "healthy", "message": "API is running"} | |
| async def process_get( | |
| youtube_api_key: str = Query(..., description="YouTube API ํค (ํ์)"), | |
| youtube_url: str = Query(..., description="์ ํ๋ธ URL", example="https://www.youtube.com/watch?v=tLwZfLMHjKo"), | |
| mode: Literal["segment", "summary"] = Query( | |
| default="summary", | |
| description="์ฒ๋ฆฌ ๋ชจ๋: 'segment' (๋ถ๋ฆฌ๋ง) ๋๋ 'summary' (๋ถ๋ฆฌ+์์ฝ)" | |
| ), | |
| penalty: float = Query( | |
| default=5.0, | |
| ge=1.0, | |
| le=30.0, | |
| description="๋์ฃผ์ ๋ถ๋ฆฌ ๋ฏผ๊ฐ๋ (1.0-30.0)\n- **์์ ๊ฐ (1.0~3.0)**: ๋ณํ๋ฅผ ๋งค์ฐ ๋ฏผ๊ฐํ๊ฒ ๊ฐ์งํ์ฌ ์ฃผ์ ๋ฅผ ์๊ฒ ์ชผ๊ฐญ๋๋ค.\n- **๊ธฐ๋ณธ ๊ฐ (5.0)**: ๊ถ์ฅํ๋ ๊ธฐ๋ณธ ์ค์ ์ ๋๋ค.\n- **ํฐ ๊ฐ (10.0+)**: ์ ์ฒด์ ์ธ ํฐ ํ๋ฆ ์์ฃผ๋ก ๊ตต์งํ๊ฒ ์ฃผ์ ๋ฅผ ๋๋๋๋ค." | |
| ), | |
| threshold: int = Query( | |
| default=90, | |
| ge=50, | |
| le=99, | |
| description="์์ฃผ์ ๋ถ๋ฆฌ ๋ฏผ๊ฐ๋ (50-99)\n- **๋ฎ์ ๊ฐ (70-80)**: ๋ฌธ์ฅ ๊ฐ์ ๋ฏธ์ธํ ์ฐจ์ด๋ ๊ฐ์งํ์ฌ ์ธ๋ฐํ๊ฒ ๋๋๋๋ค.\n- **๊ธฐ๋ณธ ๊ฐ (90)**: ๊ถ์ฅํ๋ ๊ธฐ๋ณธ ์ค์ ์ ๋๋ค.\n- **๋์ ๊ฐ (95+)**: ์์ฃผ ํ์คํ ์ฃผ์ ๋ณํ๊ฐ ์์ ๋๋ง ๋๋๋๋ค." | |
| ), | |
| gemini_api_key: Optional[str] = Query( | |
| default=None, | |
| description="Gemini API ํค (์์ฝ ๋ชจ๋์์ ํ์)" | |
| ) | |
| ): | |
| """ | |
| GET ๋ฐฉ์์ผ๋ก ์ ํ๋ธ ์์ ์ฒ๋ฆฌ | |
| - **youtube_api_key**: YouTube API ํค (ํ์) | |
| - **youtube_url**: ์ ํ๋ธ URL (ํ์) | |
| - **mode**: segment (๋ถ๋ฆฌ๋ง) ๋๋ summary (๋ถ๋ฆฌ+์์ฝ) | |
| - **penalty**: ๋์ฃผ์ ๋ถ๋ฆฌ ๋ฏผ๊ฐ๋ (๊ธฐ๋ณธ๊ฐ: 5.0) | |
| - **threshold**: ์์ฃผ์ ๋ถ๋ฆฌ ๋ฏผ๊ฐ๋ (๊ธฐ๋ณธ๊ฐ: 90) | |
| - **gemini_api_key**: Gemini API ํค (์์ฝ ๋ชจ๋์์ ํ์) | |
| """ | |
| request = ProcessRequest( | |
| youtube_api_key=youtube_api_key, | |
| youtube_url=youtube_url, | |
| mode=mode, | |
| penalty=penalty, | |
| threshold=threshold, | |
| gemini_api_key=gemini_api_key | |
| ) | |
| return await process_video(request) | |
| async def process_post(request: ProcessRequest): | |
| """ | |
| POST ๋ฐฉ์์ผ๋ก ์ ํ๋ธ ์์ ์ฒ๋ฆฌ | |
| Request Body: | |
| ```json | |
| { | |
| "youtube_api_key": "your_youtube_api_key", | |
| "youtube_url": "https://youtube.com/watch?v=VIDEO_ID", | |
| "mode": "summary", | |
| "penalty": 5.0, | |
| "threshold": 90, | |
| "gemini_api_key": "your_gemini_api_key" | |
| } | |
| ``` | |
| Response: | |
| - mode๊ฐ "segment"์ธ ๊ฒฝ์ฐ: ์ธ๊ทธ๋จผํธ ๋ฐฐ์ด ๋ฐํ | |
| - mode๊ฐ "summary"์ธ ๊ฒฝ์ฐ: ์์ฝ ํฌํจ ์ธ๊ทธ๋จผํธ ๋ฐฐ์ด ๋ฐํ | |
| """ | |
| return await process_video(request) | |
| async def process_video(request: ProcessRequest): | |
| """์ค์ ์ฒ๋ฆฌ ๋ก์ง""" | |
| try: | |
| # YouTube API ํค ์ค์ (ํ์๊ฐ) | |
| os.environ['YOUTUBE_API_KEY'] = request.youtube_api_key | |
| # Gemini API ํค ์ค์ (์์ฝ ๋ชจ๋์ธ ๊ฒฝ์ฐ) | |
| if request.mode == "summary": | |
| if request.gemini_api_key: | |
| os.environ['GEMINI_KEY'] = request.gemini_api_key | |
| elif not os.getenv('GEMINI_KEY'): | |
| raise HTTPException( | |
| status_code=400, | |
| detail="์์ฝ ๋ชจ๋์์๋ gemini_api_key๊ฐ ํ์ํฉ๋๋ค. (ํ๊ฒฝ๋ณ์ ๋๋ ํ๋ผ๋ฏธํฐ๋ก ์ ๊ณต)" | |
| ) | |
| # ์์ ๋๋ ํ ๋ฆฌ ์์ฑ | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| # 1๋จ๊ณ: ์ธ๊ทธ๋จผํธ ๋ถ๋ฆฌ | |
| segment_file = process_youtube_video( | |
| request.youtube_url, | |
| penalty=request.penalty, | |
| threshold=request.threshold, | |
| output_dir=temp_dir | |
| ) | |
| # ์ธ๊ทธ๋จผํธ ํ์ผ ์ฝ๊ธฐ | |
| with open(segment_file, 'r', encoding='utf-8') as f: | |
| segment_data = json.load(f) | |
| # ๋ถ๋ฆฌ๋ง ๋ชจ๋์ธ ๊ฒฝ์ฐ | |
| if request.mode == "segment": | |
| return JSONResponse(content={ | |
| "status": "success", | |
| "mode": "segment", | |
| "youtube_url": request.youtube_url, | |
| "total_segments": len(segment_data), | |
| "segments": segment_data | |
| }) | |
| # 2๋จ๊ณ: ์์ฝ ์์ฑ | |
| summary_file = summarize_youtube_segments( | |
| segment_file, | |
| output_json_path=os.path.join(temp_dir, "summary.json"), | |
| gemini_api_key=request.gemini_api_key | |
| ) | |
| # ์์ฝ ํ์ผ ์ฝ๊ธฐ | |
| with open(summary_file, 'r', encoding='utf-8') as f: | |
| summary_data = json.load(f) | |
| return JSONResponse(content={ | |
| "status": "success", | |
| "mode": "summary", | |
| "youtube_url": request.youtube_url, | |
| "total_segments": len(summary_data), | |
| "segments": summary_data | |
| }) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def api_info(): | |
| """ | |
| API ์ ๋ณด ์กฐํ | |
| ํ๋ก์ ํธ์ ๋ชฉ์ , ๊ธฐ์ ์คํ, ์ฌ์ฉ ๋ฐฉ๋ฒ ๋ฑ์ ์ ๊ณตํฉ๋๋ค. | |
| """ | |
| return { | |
| "project_name": "YouTube Video Analyzer", | |
| "version": "1.0.0", | |
| "description": "์ ํ๋ธ ์์์ ์๋ง์ ์ฃผ์ ๋ณ๋ก ๋ถ๋ฆฌํ๊ณ AI๋ก ์์ฝํ๋ ์๋น์ค", | |
| "purpose": { | |
| "ko": "๊ธด ์ ํ๋ธ ์์์ ๋ด์ฉ์ ๋น ๋ฅด๊ฒ ํ์ ํ๊ณ , ์ํ๋ ๋ถ๋ถ์ผ๋ก ์ด๋ํ ์ ์๋๋ก ๋์ต๋๋ค.", | |
| "en": "Helps quickly understand long YouTube videos and navigate to desired sections." | |
| }, | |
| "features": [ | |
| "ํต๊ณ์ ๋ณํ์ ํ์ง (ruptures PELT)", | |
| "์๋ฏธ ๊ธฐ๋ฐ ์ธ๊ทธ๋จผํธ ๋ถํ (Sentence Transformers)", | |
| "ํ์์คํฌํ ๋งคํ", | |
| "AI ์์ฝ ๋ฐ ์ฃผ์ ๋ผ๋ฒจ๋ง (Gemini 2.0 Flash Lite)" | |
| ], | |
| "tech_stack": { | |
| "backend": "FastAPI", | |
| "ai_models": ["sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "gemini-2.0-flash-lite"], | |
| "algorithms": ["ruptures PELT", "Semantic Chunking"], | |
| "deployment": "Hugging Face Spaces" | |
| }, | |
| "endpoints": { | |
| "GET /process": "์ ํ๋ธ ์์ ์ฒ๋ฆฌ (์ฟผ๋ฆฌ ํ๋ผ๋ฏธํฐ ์ฌ์ฉ)", | |
| "POST /process": "์ ํ๋ธ ์์ ์ฒ๋ฆฌ (JSON body ์ฌ์ฉ)", | |
| "GET /health": "ํฌ์ค ์ฒดํฌ", | |
| "GET /info": "API ์ ๋ณด" | |
| } | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| # ํ์์์ ์ค์ ์ 300์ด(5๋ถ)๋ก ์ฆ๊ฐ | |
| uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=300) | |