youtube_segment_summary / api_server.py
jonghhhh's picture
Upload api_server.py with huggingface_hub
8ebd22f verified
"""
FastAPI ์„œ๋ฒ„ - ์œ ํŠœ๋ธŒ ์˜์ƒ ๋ถ„์„ API
ํ—ˆ๊น…ํŽ˜์ด์Šค ์ŠคํŽ˜์ด์Šค ๋ฐฐํฌ์šฉ
"""
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import JSONResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import Optional, Literal
import os
import tempfile
import json
from pathlib import Path
# ๋กœ์ปฌ ๋ชจ๋“ˆ import
from youtube_segmenter import process_youtube_video
from youtube_summarizer import summarize_youtube_segments
# FastAPI ์•ฑ ์ƒ์„ฑ
app = FastAPI(
title="YouTube Video Analyzer API",
description="""
# ์œ ํŠœ๋ธŒ ์˜์ƒ ๋ถ„์„ API
์ด API๋Š” ์œ ํŠœ๋ธŒ ์˜์ƒ์˜ ์ž๋ง‰์„ ์ฃผ์ œ๋ณ„๋กœ ๋ถ„๋ฆฌํ•˜๊ณ  AI๋กœ ์š”์•ฝํ•˜๋Š” ์„œ๋น„์Šค์ž…๋‹ˆ๋‹ค.
## ์ฃผ์š” ๊ธฐ๋Šฅ
### 1. ์„ธ๊ทธ๋จผํŠธ ๋ถ„๋ฆฌ
- **ruptures PELT ์•Œ๊ณ ๋ฆฌ์ฆ˜**: ํ†ต๊ณ„์  ๋ณ€ํ™”์  ํƒ์ง€๋กœ ๋Œ€์ฃผ์ œ ๋ถ„๋ฆฌ
- **์˜๋ฏธ ์œ ์‚ฌ๋„ ๋ถ„์„**: Sentence Transformers๋กœ ์†Œ์ฃผ์ œ ์ •๋ฐ€ ๋ถ„ํ• 
- **ํƒ€์ž„์Šคํƒฌํ”„ ๋งคํ•‘**: ๊ฐ ์„ธ๊ทธ๋จผํŠธ์˜ ์˜์ƒ ์‹œ์ž‘ ์‹œ๊ฐ„ ์ œ๊ณต
### 2. AI ์š”์•ฝ
- **Gemini 2.0 Flash Lite**: Google์˜ ์ตœ์‹  ๊ฒฝ๋Ÿ‰ AI ๋ชจ๋ธ ์‚ฌ์šฉ
- **์ฃผ์ œ ๋ผ๋ฒจ๋ง**: 2-5๋‹จ์–ด๋กœ ํ•ต์‹ฌ ์ฃผ์ œ ์ถ”์ถœ
- **์Šค๋งˆํŠธ ํ•„ํ„ฐ๋ง**: ์˜๋ฏธ ์—†๋Š” ์งง์€ ๋ฆฌ์•ก์…˜ ์ž๋™ ์ œ์™ธ
## ๊ธฐ์ˆ  ์Šคํƒ
- **youtube-transcript-api**: ์ž๋ง‰ ์ถ”์ถœ
- **ruptures**: ํ†ต๊ณ„์  ๋ณ€ํ™”์  ํƒ์ง€
- **sentence-transformers**: ๋ฌธ์žฅ ์ž„๋ฒ ๋”ฉ
- **google-generativeai**: AI ์š”์•ฝ
- **FastAPI**: REST API ์„œ๋ฒ„
## ํŒŒ๋ผ๋ฏธํ„ฐ ๊ฐ€์ด๋“œ
### Penalty (๋Œ€์ฃผ์ œ ๋ถ„๋ฆฌ)
- **1.0-3.0**: ๋งค์šฐ ์„ธ๋ฐ€ํ•œ ๋ถ„๋ฆฌ (๋งŽ์€ ์ฃผ์ œ)
- **5.0**: ๊ท ํ˜•์žกํžŒ ๋ถ„๋ฆฌ (๊ธฐ๋ณธ๊ฐ’)
- **7.0-10.0**: ํฐ ์ฃผ์ œ ๋‹จ์œ„ ๋ถ„๋ฆฌ
- **15.0-30.0**: ๋งค์šฐ ํฐ ๋ฉ์–ด๋ฆฌ๋กœ ๋ถ„๋ฆฌ
### Threshold (์†Œ์ฃผ์ œ ๋ถ„๋ฆฌ)
- **70-80**: ์„ธ๋ฐ€ํ•œ ๋ถ„๋ฆฌ
- **85-92**: ๊ท ํ˜•์žกํžŒ ๋ถ„๋ฆฌ (๊ถŒ์žฅ)
- **90**: ๊ธฐ๋ณธ๊ฐ’
- **95-98**: ํฐ ๋ฉ์–ด๋ฆฌ ๋ถ„๋ฆฌ
## ์‚ฌ์šฉ ์˜ˆ์‹œ
### GET ์š”์ฒญ
```
/process?youtube_url=https://youtube.com/watch?v=VIDEO_ID&mode=summary&penalty=5.0&threshold=90&gemini_api_key=YOUR_KEY&youtube_api_key=YOUR_KEY
```
### POST ์š”์ฒญ
```json
{
"youtube_url": "https://youtube.com/watch?v=VIDEO_ID",
"mode": "summary",
"penalty": 5.0,
"threshold": 90,
"gemini_api_key": "your_api_key_here",
"youtube_api_key": "your_youtube_api_key_here"
}
```
""",
version="1.0.0",
contact={
"name": "YouTube Analyzer",
"url": "https://github.com/yourusername/youtube-analyzer"
}
)
# CORS ์„ค์ •
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Request ๋ชจ๋ธ
class ProcessRequest(BaseModel):
youtube_api_key: str = Field(..., description="YouTube API ํ‚ค (ํ•„์ˆ˜)", example="YOUR_YOUTUBE_API_KEY")
youtube_url: str = Field(..., description="์œ ํŠœ๋ธŒ URL", example="https://www.youtube.com/watch?v=tLwZfLMHjKo")
mode: Literal["segment", "summary"] = Field(
default="summary",
description="์ฒ˜๋ฆฌ ๋ชจ๋“œ: 'segment' (๋ถ„๋ฆฌ๋งŒ) ๋˜๋Š” 'summary' (๋ถ„๋ฆฌ+์š”์•ฝ)"
)
penalty: float = Field(
default=5.0,
ge=1.0,
le=30.0,
description="๋Œ€์ฃผ์ œ ๋ถ„๋ฆฌ ๋ฏผ๊ฐ๋„ (1.0-30.0)\n- **์ž‘์€ ๊ฐ’ (1.0~3.0)**: ๋ณ€ํ™”๋ฅผ ๋งค์šฐ ๋ฏผ๊ฐํ•˜๊ฒŒ ๊ฐ์ง€ํ•˜์—ฌ ์ฃผ์ œ๋ฅผ ์ž˜๊ฒŒ ์ชผ๊ฐญ๋‹ˆ๋‹ค.\n- **๊ธฐ๋ณธ ๊ฐ’ (5.0)**: ๊ถŒ์žฅํ•˜๋Š” ๊ธฐ๋ณธ ์„ค์ •์ž…๋‹ˆ๋‹ค.\n- **ํฐ ๊ฐ’ (10.0+)**: ์ „์ฒด์ ์ธ ํฐ ํ๋ฆ„ ์œ„์ฃผ๋กœ ๊ตต์งํ•˜๊ฒŒ ์ฃผ์ œ๋ฅผ ๋‚˜๋ˆ•๋‹ˆ๋‹ค."
)
threshold: int = Field(
default=90,
ge=50,
le=99,
description="์†Œ์ฃผ์ œ ๋ถ„๋ฆฌ ๋ฏผ๊ฐ๋„ (50-99)\n- **๋‚ฎ์€ ๊ฐ’ (70-80)**: ๋ฌธ์žฅ ๊ฐ„์˜ ๋ฏธ์„ธํ•œ ์ฐจ์ด๋„ ๊ฐ์ง€ํ•˜์—ฌ ์„ธ๋ฐ€ํ•˜๊ฒŒ ๋‚˜๋ˆ•๋‹ˆ๋‹ค.\n- **๊ธฐ๋ณธ ๊ฐ’ (90)**: ๊ถŒ์žฅํ•˜๋Š” ๊ธฐ๋ณธ ์„ค์ •์ž…๋‹ˆ๋‹ค.\n- **๋†’์€ ๊ฐ’ (95+)**: ์•„์ฃผ ํ™•์‹คํ•œ ์ฃผ์ œ ๋ณ€ํ™”๊ฐ€ ์žˆ์„ ๋•Œ๋งŒ ๋‚˜๋ˆ•๋‹ˆ๋‹ค."
)
gemini_api_key: Optional[str] = Field(
default=None,
description="Gemini API ํ‚ค (์š”์•ฝ ๋ชจ๋“œ์—์„œ ํ•„์ˆ˜)"
)
class Config:
schema_extra = {
"example": {
"youtube_url": "https://www.youtube.com/watch?v=tLwZfLMHjKo",
"mode": "summary",
"penalty": 5.0,
"threshold": 90,
"gemini_api_key": "your_gemini_api_key_here",
"youtube_api_key": "your_youtube_api_key_here"
}
}
# Response ๋ชจ๋ธ
class SegmentItem(BaseModel):
url: str
chunk_id: str
chunk_time: str
text: str
class SummaryItem(BaseModel):
url: str
chunk_id: str
chunk_time: str
text: str
summary: Optional[str]
topic: Optional[str]
@app.get("/", response_class=HTMLResponse, include_in_schema=False)
async def root():
"""API ํ™ˆํŽ˜์ด์ง€ - Swagger UI๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ"""
return """
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="0; url=/docs">
<title>Redirecting...</title>
</head>
<body>
<p>Redirecting to <a href="/docs">API Documentation</a>...</p>
</body>
</html>
"""
@app.get("/health")
async def health_check():
"""ํ—ฌ์Šค ์ฒดํฌ ์—”๋“œํฌ์ธํŠธ"""
return {"status": "healthy", "message": "API is running"}
@app.get("/process", tags=["Processing"])
async def process_get(
youtube_api_key: str = Query(..., description="YouTube API ํ‚ค (ํ•„์ˆ˜)"),
youtube_url: str = Query(..., description="์œ ํŠœ๋ธŒ URL", example="https://www.youtube.com/watch?v=tLwZfLMHjKo"),
mode: Literal["segment", "summary"] = Query(
default="summary",
description="์ฒ˜๋ฆฌ ๋ชจ๋“œ: 'segment' (๋ถ„๋ฆฌ๋งŒ) ๋˜๋Š” 'summary' (๋ถ„๋ฆฌ+์š”์•ฝ)"
),
penalty: float = Query(
default=5.0,
ge=1.0,
le=30.0,
description="๋Œ€์ฃผ์ œ ๋ถ„๋ฆฌ ๋ฏผ๊ฐ๋„ (1.0-30.0)\n- **์ž‘์€ ๊ฐ’ (1.0~3.0)**: ๋ณ€ํ™”๋ฅผ ๋งค์šฐ ๋ฏผ๊ฐํ•˜๊ฒŒ ๊ฐ์ง€ํ•˜์—ฌ ์ฃผ์ œ๋ฅผ ์ž˜๊ฒŒ ์ชผ๊ฐญ๋‹ˆ๋‹ค.\n- **๊ธฐ๋ณธ ๊ฐ’ (5.0)**: ๊ถŒ์žฅํ•˜๋Š” ๊ธฐ๋ณธ ์„ค์ •์ž…๋‹ˆ๋‹ค.\n- **ํฐ ๊ฐ’ (10.0+)**: ์ „์ฒด์ ์ธ ํฐ ํ๋ฆ„ ์œ„์ฃผ๋กœ ๊ตต์งํ•˜๊ฒŒ ์ฃผ์ œ๋ฅผ ๋‚˜๋ˆ•๋‹ˆ๋‹ค."
),
threshold: int = Query(
default=90,
ge=50,
le=99,
description="์†Œ์ฃผ์ œ ๋ถ„๋ฆฌ ๋ฏผ๊ฐ๋„ (50-99)\n- **๋‚ฎ์€ ๊ฐ’ (70-80)**: ๋ฌธ์žฅ ๊ฐ„์˜ ๋ฏธ์„ธํ•œ ์ฐจ์ด๋„ ๊ฐ์ง€ํ•˜์—ฌ ์„ธ๋ฐ€ํ•˜๊ฒŒ ๋‚˜๋ˆ•๋‹ˆ๋‹ค.\n- **๊ธฐ๋ณธ ๊ฐ’ (90)**: ๊ถŒ์žฅํ•˜๋Š” ๊ธฐ๋ณธ ์„ค์ •์ž…๋‹ˆ๋‹ค.\n- **๋†’์€ ๊ฐ’ (95+)**: ์•„์ฃผ ํ™•์‹คํ•œ ์ฃผ์ œ ๋ณ€ํ™”๊ฐ€ ์žˆ์„ ๋•Œ๋งŒ ๋‚˜๋ˆ•๋‹ˆ๋‹ค."
),
gemini_api_key: Optional[str] = Query(
default=None,
description="Gemini API ํ‚ค (์š”์•ฝ ๋ชจ๋“œ์—์„œ ํ•„์ˆ˜)"
)
):
"""
GET ๋ฐฉ์‹์œผ๋กœ ์œ ํŠœ๋ธŒ ์˜์ƒ ์ฒ˜๋ฆฌ
- **youtube_api_key**: YouTube API ํ‚ค (ํ•„์ˆ˜)
- **youtube_url**: ์œ ํŠœ๋ธŒ URL (ํ•„์ˆ˜)
- **mode**: segment (๋ถ„๋ฆฌ๋งŒ) ๋˜๋Š” summary (๋ถ„๋ฆฌ+์š”์•ฝ)
- **penalty**: ๋Œ€์ฃผ์ œ ๋ถ„๋ฆฌ ๋ฏผ๊ฐ๋„ (๊ธฐ๋ณธ๊ฐ’: 5.0)
- **threshold**: ์†Œ์ฃผ์ œ ๋ถ„๋ฆฌ ๋ฏผ๊ฐ๋„ (๊ธฐ๋ณธ๊ฐ’: 90)
- **gemini_api_key**: Gemini API ํ‚ค (์š”์•ฝ ๋ชจ๋“œ์—์„œ ํ•„์š”)
"""
request = ProcessRequest(
youtube_api_key=youtube_api_key,
youtube_url=youtube_url,
mode=mode,
penalty=penalty,
threshold=threshold,
gemini_api_key=gemini_api_key
)
return await process_video(request)
@app.post("/process", tags=["Processing"])
async def process_post(request: ProcessRequest):
"""
POST ๋ฐฉ์‹์œผ๋กœ ์œ ํŠœ๋ธŒ ์˜์ƒ ์ฒ˜๋ฆฌ
Request Body:
```json
{
"youtube_api_key": "your_youtube_api_key",
"youtube_url": "https://youtube.com/watch?v=VIDEO_ID",
"mode": "summary",
"penalty": 5.0,
"threshold": 90,
"gemini_api_key": "your_gemini_api_key"
}
```
Response:
- mode๊ฐ€ "segment"์ธ ๊ฒฝ์šฐ: ์„ธ๊ทธ๋จผํŠธ ๋ฐฐ์—ด ๋ฐ˜ํ™˜
- mode๊ฐ€ "summary"์ธ ๊ฒฝ์šฐ: ์š”์•ฝ ํฌํ•จ ์„ธ๊ทธ๋จผํŠธ ๋ฐฐ์—ด ๋ฐ˜ํ™˜
"""
return await process_video(request)
async def process_video(request: ProcessRequest):
"""์‹ค์ œ ์ฒ˜๋ฆฌ ๋กœ์ง"""
try:
# YouTube API ํ‚ค ์„ค์ • (ํ•„์ˆ˜๊ฐ’)
os.environ['YOUTUBE_API_KEY'] = request.youtube_api_key
# Gemini API ํ‚ค ์„ค์ • (์š”์•ฝ ๋ชจ๋“œ์ธ ๊ฒฝ์šฐ)
if request.mode == "summary":
if request.gemini_api_key:
os.environ['GEMINI_KEY'] = request.gemini_api_key
elif not os.getenv('GEMINI_KEY'):
raise HTTPException(
status_code=400,
detail="์š”์•ฝ ๋ชจ๋“œ์—์„œ๋Š” gemini_api_key๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. (ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋˜๋Š” ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ์ œ๊ณต)"
)
# ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
with tempfile.TemporaryDirectory() as temp_dir:
# 1๋‹จ๊ณ„: ์„ธ๊ทธ๋จผํŠธ ๋ถ„๋ฆฌ
segment_file = process_youtube_video(
request.youtube_url,
penalty=request.penalty,
threshold=request.threshold,
output_dir=temp_dir
)
# ์„ธ๊ทธ๋จผํŠธ ํŒŒ์ผ ์ฝ๊ธฐ
with open(segment_file, 'r', encoding='utf-8') as f:
segment_data = json.load(f)
# ๋ถ„๋ฆฌ๋งŒ ๋ชจ๋“œ์ธ ๊ฒฝ์šฐ
if request.mode == "segment":
return JSONResponse(content={
"status": "success",
"mode": "segment",
"youtube_url": request.youtube_url,
"total_segments": len(segment_data),
"segments": segment_data
})
# 2๋‹จ๊ณ„: ์š”์•ฝ ์ƒ์„ฑ
summary_file = summarize_youtube_segments(
segment_file,
output_json_path=os.path.join(temp_dir, "summary.json"),
gemini_api_key=request.gemini_api_key
)
# ์š”์•ฝ ํŒŒ์ผ ์ฝ๊ธฐ
with open(summary_file, 'r', encoding='utf-8') as f:
summary_data = json.load(f)
return JSONResponse(content={
"status": "success",
"mode": "summary",
"youtube_url": request.youtube_url,
"total_segments": len(summary_data),
"segments": summary_data
})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/info", tags=["Information"])
async def api_info():
"""
API ์ •๋ณด ์กฐํšŒ
ํ”„๋กœ์ ํŠธ์˜ ๋ชฉ์ , ๊ธฐ์ˆ  ์Šคํƒ, ์‚ฌ์šฉ ๋ฐฉ๋ฒ• ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
"""
return {
"project_name": "YouTube Video Analyzer",
"version": "1.0.0",
"description": "์œ ํŠœ๋ธŒ ์˜์ƒ์˜ ์ž๋ง‰์„ ์ฃผ์ œ๋ณ„๋กœ ๋ถ„๋ฆฌํ•˜๊ณ  AI๋กœ ์š”์•ฝํ•˜๋Š” ์„œ๋น„์Šค",
"purpose": {
"ko": "๊ธด ์œ ํŠœ๋ธŒ ์˜์ƒ์˜ ๋‚ด์šฉ์„ ๋น ๋ฅด๊ฒŒ ํŒŒ์•…ํ•˜๊ณ , ์›ํ•˜๋Š” ๋ถ€๋ถ„์œผ๋กœ ์ด๋™ํ•  ์ˆ˜ ์žˆ๋„๋ก ๋•์Šต๋‹ˆ๋‹ค.",
"en": "Helps quickly understand long YouTube videos and navigate to desired sections."
},
"features": [
"ํ†ต๊ณ„์  ๋ณ€ํ™”์  ํƒ์ง€ (ruptures PELT)",
"์˜๋ฏธ ๊ธฐ๋ฐ˜ ์„ธ๊ทธ๋จผํŠธ ๋ถ„ํ•  (Sentence Transformers)",
"ํƒ€์ž„์Šคํƒฌํ”„ ๋งคํ•‘",
"AI ์š”์•ฝ ๋ฐ ์ฃผ์ œ ๋ผ๋ฒจ๋ง (Gemini 2.0 Flash Lite)"
],
"tech_stack": {
"backend": "FastAPI",
"ai_models": ["sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "gemini-2.0-flash-lite"],
"algorithms": ["ruptures PELT", "Semantic Chunking"],
"deployment": "Hugging Face Spaces"
},
"endpoints": {
"GET /process": "์œ ํŠœ๋ธŒ ์˜์ƒ ์ฒ˜๋ฆฌ (์ฟผ๋ฆฌ ํŒŒ๋ผ๋ฏธํ„ฐ ์‚ฌ์šฉ)",
"POST /process": "์œ ํŠœ๋ธŒ ์˜์ƒ ์ฒ˜๋ฆฌ (JSON body ์‚ฌ์šฉ)",
"GET /health": "ํ—ฌ์Šค ์ฒดํฌ",
"GET /info": "API ์ •๋ณด"
}
}
if __name__ == "__main__":
import uvicorn
# ํƒ€์ž„์•„์›ƒ ์„ค์ •์„ 300์ดˆ(5๋ถ„)๋กœ ์ฆ๊ฐ€
uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=300)