Spaces:
Sleeping
Sleeping
Lily LLM API ์ฌ์ฉ์ ๊ฐ์ด๋
๐ ๋ชฉ์ฐจ
๐ ์์ํ๊ธฐ
์์คํ ์๊ตฌ์ฌํญ
์ต์ ์ฌ์:
- CPU: 4์ฝ์ด ์ด์
- RAM: 8GB ์ด์
- ์ ์ฅ๊ณต๊ฐ: 20GB ์ด์
- GPU: ์ ํ์ฌํญ (CUDA ์ง์ ์ ์ฑ๋ฅ ํฅ์)
๊ถ์ฅ ์ฌ์:
- CPU: 8์ฝ์ด ์ด์
- RAM: 16GB ์ด์
- ์ ์ฅ๊ณต๊ฐ: 50GB ์ด์
- GPU: NVIDIA RTX 3060 ์ด์ (CUDA ์ง์)
์ค์น ๋ฐ ์คํ
1. Docker๋ฅผ ์ฌ์ฉํ ๋ฐฐํฌ (๊ถ์ฅ)
# ์ ์ฅ์ ํด๋ก
git clone <repository-url>
cd lily_generate_package
# ๋ฐฐํฌ ์คํ
chmod +x scripts/deploy.sh
./scripts/deploy.sh deploy
# ์ํ ํ์ธ
./scripts/deploy.sh status
2. ๋ก์ปฌ ๊ฐ๋ฐ ํ๊ฒฝ
# ๊ฐ์ํ๊ฒฝ ์์ฑ
python -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
# ์์กด์ฑ ์ค์น
pip install -r requirements.txt
# NLTK ๋ฐ์ดํฐ ๋ค์ด๋ก๋
python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab')"
# ์๋ฒ ์คํ
python run_server_v2.py
์ฒซ ๋ฒ์งธ ์์ฒญ
# ์๋ฒ ์ํ ํ์ธ
curl http://localhost:8001/health
# ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํ
curl http://localhost:8001/models
# ๊ฐ๋จํ ํ
์คํธ ์์ฑ
curl -X POST http://localhost:8001/generate \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "prompt=์๋
ํ์ธ์!&model_id=polyglot-ko-1.3b-chat&max_length=100"
๐ค ๊ธฐ๋ณธ ๊ธฐ๋ฅ
1. ํ ์คํธ ์์ฑ
๋จ์ ํ ์คํธ ์์ฑ
import requests
def generate_text(prompt, model_id="polyglot-ko-1.3b-chat"):
url = "http://localhost:8001/generate"
data = {
"prompt": prompt,
"model_id": model_id,
"max_length": 200,
"temperature": 0.7,
"top_p": 0.9,
"do_sample": True
}
response = requests.post(url, data=data)
return response.json()
# ์ฌ์ฉ ์์
result = generate_text("์ธ๊ณต์ง๋ฅ์ ๋ฏธ๋์ ๋ํด ์ค๋ช
ํด์ฃผ์ธ์.")
print(result["generated_text"])
ํ๋ผ๋ฏธํฐ ์ค๋ช
| ํ๋ผ๋ฏธํฐ | ์ค๋ช | ๊ธฐ๋ณธ๊ฐ | ๋ฒ์ |
|---|---|---|---|
prompt |
์ ๋ ฅ ํ ์คํธ | ํ์ | - |
model_id |
์ฌ์ฉํ ๋ชจ๋ธ | polyglot-ko-1.3b-chat | ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก |
max_length |
์ต๋ ํ ํฐ ์ | 200 | 1-4000 |
temperature |
์ฐฝ์์ฑ ์กฐ์ | 0.7 | 0.0-2.0 |
top_p |
๋์ ํ๋ฅ ์๊ณ๊ฐ | 0.9 | 0.0-1.0 |
do_sample |
์ํ๋ง ์ฌ์ฉ ์ฌ๋ถ | True | True/False |
2. ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ
์ด๋ฏธ์ง์ ํ ์คํธ ํจ๊ป ์ฒ๋ฆฌ
def generate_multimodal(prompt, image_files, model_id="kanana-1.5-v-3b-instruct"):
url = "http://localhost:8001/generate-multimodal"
files = []
for i, image_file in enumerate(image_files):
files.append(('image_files', (f'image_{i}.jpg', open(image_file, 'rb'), 'image/jpeg')))
data = {
"prompt": prompt,
"model_id": model_id,
"max_length": 200,
"temperature": 0.7
}
response = requests.post(url, files=files, data=data)
return response.json()
# ์ฌ์ฉ ์์
result = generate_multimodal(
"์ด ์ด๋ฏธ์ง์ ๋ํด ์ค๋ช
ํด์ฃผ์ธ์.",
["image1.jpg", "image2.jpg"]
)
print(result["generated_text"])
3. ์ฌ์ฉ์ ๊ด๋ฆฌ
์ฌ์ฉ์ ๋ฑ๋ก ๋ฐ ๋ก๊ทธ์ธ
def register_user(username, email, password):
url = "http://localhost:8001/auth/register"
data = {
"username": username,
"email": email,
"password": password
}
response = requests.post(url, data=data)
return response.json()
def login_user(username, password):
url = "http://localhost:8001/auth/login"
data = {
"username": username,
"password": password
}
response = requests.post(url, data=data)
return response.json()
# ์ฌ์ฉ ์์
# 1. ์ฌ์ฉ์ ๋ฑ๋ก
register_result = register_user("testuser", "test@example.com", "password123")
access_token = register_result["access_token"]
# 2. ๋ก๊ทธ์ธ
login_result = login_user("testuser", "password123")
access_token = login_result["access_token"]
์ธ์ฆ์ด ํ์ํ ์์ฒญ
def authenticated_request(url, data, token):
headers = {"Authorization": f"Bearer {token}"}
response = requests.post(url, data=data, headers=headers)
return response.json()
# ์ฌ์ฉ ์์
result = authenticated_request(
"http://localhost:8001/generate",
{"prompt": "์๋
ํ์ธ์!", "model_id": "polyglot-ko-1.3b-chat"},
access_token
)
๐ ๊ณ ๊ธ ๊ธฐ๋ฅ
1. ๋ฌธ์ ์ฒ๋ฆฌ (RAG)
๋ฌธ์ ์ ๋ก๋
def upload_document(file_path, user_id, token=None):
url = "http://localhost:8001/document/upload"
with open(file_path, 'rb') as f:
files = {'file': f}
data = {'user_id': user_id}
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.post(url, files=files, data=data, headers=headers)
return response.json()
# ์ฌ์ฉ ์์
result = upload_document("document.pdf", "user123", access_token)
document_id = result["document_id"]
RAG ์ฟผ๋ฆฌ
def rag_query(query, user_id, token=None):
url = "http://localhost:8001/rag/generate"
data = {
"query": query,
"user_id": user_id,
"max_length": 300,
"temperature": 0.7
}
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.post(url, data=data, headers=headers)
return response.json()
# ์ฌ์ฉ ์์
result = rag_query("์ธ๊ณต์ง๋ฅ์ ๋ฏธ๋์ ๋ํด ์๋ ค์ฃผ์ธ์.", "user123", access_token)
print(result["response"])
print("์ถ์ฒ:", result["sources"])
ํ์ด๋ธ๋ฆฌ๋ RAG (์ด๋ฏธ์ง + ๋ฌธ์)
def hybrid_rag_query(query, image_files, user_id, token=None):
url = "http://localhost:8001/rag/generate-hybrid"
files = []
for i, image_file in enumerate(image_files):
files.append(('image_files', (f'image_{i}.jpg', open(image_file, 'rb'), 'image/jpeg')))
data = {
"query": query,
"user_id": user_id,
"max_length": 300,
"temperature": 0.7
}
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.post(url, files=files, data=data, headers=headers)
return response.json()
2. ์ฑํ ์ธ์ ๊ด๋ฆฌ
์ธ์ ์์ฑ ๋ฐ ๋ฉ์์ง ๊ด๋ฆฌ
def create_chat_session(user_id, session_name, token=None):
url = "http://localhost:8001/session/create"
data = {
"user_id": user_id,
"session_name": session_name
}
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.post(url, data=data, headers=headers)
return response.json()
def add_chat_message(session_id, user_id, content, token=None):
url = "http://localhost:8001/chat/message"
data = {
"session_id": session_id,
"user_id": user_id,
"message_type": "text",
"content": content
}
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.post(url, data=data, headers=headers)
return response.json()
def get_chat_history(session_id, token=None):
url = f"http://localhost:8001/chat/history/{session_id}"
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.get(url, headers=headers)
return response.json()
# ์ฌ์ฉ ์์
# 1. ์ธ์
์์ฑ
session_result = create_chat_session("user123", "AI ์๋ด", access_token)
session_id = session_result["session_id"]
# 2. ๋ฉ์์ง ์ถ๊ฐ
add_chat_message(session_id, "user123", "์๋
ํ์ธ์!", access_token)
# 3. ์ฑํ
๊ธฐ๋ก ์กฐํ
history = get_chat_history(session_id, access_token)
for message in history:
print(f"{message['timestamp']}: {message['content']}")
3. ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
๋ฌธ์ ์ฒ๋ฆฌ ์์
def start_document_processing(file_path, user_id, token=None):
url = "http://localhost:8001/tasks/document/process"
data = {
"file_path": file_path,
"user_id": user_id
}
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.post(url, data=data, headers=headers)
return response.json()
def check_task_status(task_id, token=None):
url = f"http://localhost:8001/tasks/{task_id}"
headers = {"Authorization": f"Bearer {token}"} if token else {}
response = requests.get(url, headers=headers)
return response.json()
# ์ฌ์ฉ ์์
# 1. ์์
์์
task_result = start_document_processing("/path/to/document.pdf", "user123", access_token)
task_id = task_result["task_id"]
# 2. ์์
์ํ ํ์ธ
import time
while True:
status = check_task_status(task_id, access_token)
print(f"์ํ: {status['status']}, ์งํ๋ฅ : {status.get('progress', 0)}%")
if status['status'] in ['SUCCESS', 'FAILURE']:
break
time.sleep(5)
4. ๋ชจ๋ํฐ๋ง
์ฑ๋ฅ ๋ชจ๋ํฐ๋ง
def start_monitoring():
url = "http://localhost:8001/monitoring/start"
response = requests.post(url)
return response.json()
def get_monitoring_status():
url = "http://localhost:8001/monitoring/status"
response = requests.get(url)
return response.json()
def get_system_health():
url = "http://localhost:8001/monitoring/health"
response = requests.get(url)
return response.json()
# ์ฌ์ฉ ์์
# 1. ๋ชจ๋ํฐ๋ง ์์
start_monitoring()
# 2. ์ํ ํ์ธ
status = get_monitoring_status()
print(f"CPU ์ฌ์ฉ๋ฅ : {status['current_metrics']['cpu_percent']}%")
print(f"๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ฅ : {status['current_metrics']['memory_percent']}%")
# 3. ์์คํ
๊ฑด๊ฐ ์ํ
health = get_system_health()
print(f"์์คํ
์ํ: {health['status']}")
for recommendation in health['recommendations']:
print(f"๊ถ์ฅ์ฌํญ: {recommendation}")
๐ WebSocket ์ค์๊ฐ ์ฑํ
WebSocket ํด๋ผ์ด์ธํธ
class LilyLLMWebSocket {
constructor(userId) {
this.userId = userId;
this.ws = null;
this.messageHandlers = [];
}
connect() {
this.ws = new WebSocket(`ws://localhost:8001/ws/${this.userId}`);
this.ws.onopen = () => {
console.log('WebSocket ์ฐ๊ฒฐ๋จ');
};
this.ws.onmessage = (event) => {
const data = JSON.parse(event.data);
this.handleMessage(data);
};
this.ws.onclose = () => {
console.log('WebSocket ์ฐ๊ฒฐ ์ข
๋ฃ');
};
this.ws.onerror = (error) => {
console.error('WebSocket ์ค๋ฅ:', error);
};
}
sendMessage(message, sessionId) {
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify({
type: 'chat',
message: message,
session_id: sessionId
}));
}
}
addMessageHandler(handler) {
this.messageHandlers.push(handler);
}
handleMessage(data) {
this.messageHandlers.forEach(handler => handler(data));
}
disconnect() {
if (this.ws) {
this.ws.close();
}
}
}
// ์ฌ์ฉ ์์
const wsClient = new LilyLLMWebSocket('user123');
wsClient.connect();
wsClient.addMessageHandler((data) => {
console.log('๋ฉ์์ง ์์ :', data);
});
wsClient.sendMessage('์๋
ํ์ธ์!', 'session123');
๐จ ๋ฌธ์ ํด๊ฒฐ
์ผ๋ฐ์ ์ธ ๋ฌธ์ ๋ค
1. ์๋ฒ ์ฐ๊ฒฐ ์คํจ
์ฆ์: Connection refused ๋๋ Failed to establish a new connection
ํด๊ฒฐ ๋ฐฉ๋ฒ:
# ์๋ฒ ์ํ ํ์ธ
curl http://localhost:8001/health
# ์๋ฒ ์ฌ์์
./scripts/deploy.sh restart
# ๋ก๊ทธ ํ์ธ
./scripts/deploy.sh logs
2. ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ
์ฆ์: Out of memory ๋๋ ์๋ต ์๋ ์ ํ
ํด๊ฒฐ ๋ฐฉ๋ฒ:
# ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ํ์ธ
docker stats
# ๋ถํ์ํ ์ปจํ
์ด๋ ์ ๋ฆฌ
docker system prune -f
# ๋ฆฌ์์ค ์ ํ ์ค์ (docker-compose.yml)
services:
lily-llm-api:
deploy:
resources:
limits:
memory: 4G
3. ๋ชจ๋ธ ๋ก๋ฉ ์คํจ
์ฆ์: Model not found ๋๋ ๋ชจ๋ธ ๊ด๋ จ ์ค๋ฅ
ํด๊ฒฐ ๋ฐฉ๋ฒ:
# ๋ชจ๋ธ ๋ชฉ๋ก ํ์ธ
curl http://localhost:8001/models
# ๋ชจ๋ธ ํ์ผ ํ์ธ
ls -la models/
# ์๋ฒ ์ฌ์์
./scripts/deploy.sh restart
4. ์ธ์ฆ ์ค๋ฅ
์ฆ์: 401 Unauthorized ๋๋ 403 Forbidden
ํด๊ฒฐ ๋ฐฉ๋ฒ:
# ํ ํฐ ๊ฐฑ์
def refresh_token(refresh_token):
url = "http://localhost:8001/auth/refresh"
data = {"refresh_token": refresh_token}
response = requests.post(url, data=data)
return response.json()
# ์๋ก์ด ํ ํฐ์ผ๋ก ์์ฒญ
new_tokens = refresh_token(old_refresh_token)
access_token = new_tokens["access_token"]
์ฑ๋ฅ ์ต์ ํ
1. ๋ฐฐ์น ์ฒ๋ฆฌ
def batch_generate_texts(prompts, model_id="polyglot-ko-1.3b-chat"):
results = []
for prompt in prompts:
result = generate_text(prompt, model_id)
results.append(result)
return results
# ์ฌ์ฉ ์์
prompts = [
"์ฒซ ๋ฒ์งธ ์ง๋ฌธ์
๋๋ค.",
"๋ ๋ฒ์งธ ์ง๋ฌธ์
๋๋ค.",
"์ธ ๋ฒ์งธ ์ง๋ฌธ์
๋๋ค."
]
results = batch_generate_texts(prompts)
2. ์บ์ฑ ํ์ฉ
import redis
import json
class CachedLilyLLMClient:
def __init__(self, base_url="http://localhost:8001"):
self.base_url = base_url
self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
def generate_text_with_cache(self, prompt, model_id="polyglot-ko-1.3b-chat"):
# ์บ์ ํค ์์ฑ
cache_key = f"text_gen:{hash(prompt + model_id)}"
# ์บ์์์ ํ์ธ
cached_result = self.redis_client.get(cache_key)
if cached_result:
return json.loads(cached_result)
# API ํธ์ถ
result = generate_text(prompt, model_id)
# ์บ์์ ์ ์ฅ (1์๊ฐ)
self.redis_client.setex(cache_key, 3600, json.dumps(result))
return result
๐ ๋ชจ๋ฒ ์ฌ๋ก
1. ์๋ฌ ์ฒ๋ฆฌ
import requests
from requests.exceptions import RequestException
def safe_api_call(func, *args, **kwargs):
try:
return func(*args, **kwargs)
except RequestException as e:
print(f"๋คํธ์ํฌ ์ค๋ฅ: {e}")
return None
except Exception as e:
print(f"์์์น ๋ชปํ ์ค๋ฅ: {e}")
return None
# ์ฌ์ฉ ์์
result = safe_api_call(generate_text, "์๋
ํ์ธ์!")
if result:
print(result["generated_text"])
2. ์ฌ์๋ ๋ก์ง
import time
from functools import wraps
def retry_on_failure(max_retries=3, delay=1):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_retries - 1:
raise e
print(f"์๋ {attempt + 1} ์คํจ, {delay}์ด ํ ์ฌ์๋...")
time.sleep(delay)
return None
return wrapper
return decorator
# ์ฌ์ฉ ์์
@retry_on_failure(max_retries=3, delay=2)
def robust_generate_text(prompt):
return generate_text(prompt)
3. ๋น๋๊ธฐ ์ฒ๋ฆฌ
import asyncio
import aiohttp
async def async_generate_text(session, prompt, model_id="polyglot-ko-1.3b-chat"):
url = "http://localhost:8001/generate"
data = {
"prompt": prompt,
"model_id": model_id,
"max_length": 200,
"temperature": 0.7
}
async with session.post(url, data=data) as response:
return await response.json()
async def batch_generate_async(prompts):
async with aiohttp.ClientSession() as session:
tasks = [async_generate_text(session, prompt) for prompt in prompts]
results = await asyncio.gather(*tasks)
return results
# ์ฌ์ฉ ์์
prompts = ["์ง๋ฌธ1", "์ง๋ฌธ2", "์ง๋ฌธ3"]
results = asyncio.run(batch_generate_async(prompts))
4. ๋ก๊น
import logging
# ๋ก๊น
์ค์
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('lily_llm_client.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def generate_text_with_logging(prompt, model_id="polyglot-ko-1.3b-chat"):
logger.info(f"ํ
์คํธ ์์ฑ ์์: {prompt[:50]}...")
try:
result = generate_text(prompt, model_id)
logger.info(f"ํ
์คํธ ์์ฑ ์ฑ๊ณต: {len(result['generated_text'])} ๋ฌธ์")
return result
except Exception as e:
logger.error(f"ํ
์คํธ ์์ฑ ์คํจ: {e}")
raise
๐ ์ง์
๋์๋ง ๋ฆฌ์์ค
- API ๋ฌธ์:
http://localhost:8001/docs - ReDoc ๋ฌธ์:
http://localhost:8001/redoc - GitHub Issues: ํ๋ก์ ํธ ์ ์ฅ์์ Issues ์น์
- ๋ก๊ทธ ํ์ผ:
./logs/๋๋ ํ ๋ฆฌ
๋๋ฒ๊น ํ
- ๋ก๊ทธ ํ์ธ: ํญ์ ๋ก๊ทธ๋ฅผ ๋จผ์ ํ์ธํ์ธ์
- ๋จ๊ณ๋ณ ํ ์คํธ: ๋ณต์กํ ์์ฒญ์ ์์ ๋จ์๋ก ๋๋์ด ํ ์คํธํ์ธ์
- ๋คํธ์ํฌ ํ์ธ: ๋ฐฉํ๋ฒฝ์ด๋ ํ๋ก์ ์ค์ ์ ํ์ธํ์ธ์
- ๋ฆฌ์์ค ๋ชจ๋ํฐ๋ง: CPU, ๋ฉ๋ชจ๋ฆฌ, ๋์คํฌ ์ฌ์ฉ๋์ ์ฃผ๊ธฐ์ ์ผ๋ก ํ์ธํ์ธ์
์ฑ๋ฅ ํ
- ์ ์ ํ ๋ชจ๋ธ ์ ํ: ์์ ์ ๋ง๋ ๋ชจ๋ธ์ ์ ํํ์ธ์
- ๋ฐฐ์น ์ฒ๋ฆฌ: ์ฌ๋ฌ ์์ฒญ์ ํ ๋ฒ์ ์ฒ๋ฆฌํ์ธ์
- ์บ์ฑ ํ์ฉ: ๋ฐ๋ณต๋๋ ์์ฒญ์ ์บ์๋ฅผ ์ฌ์ฉํ์ธ์
- ๋น๋๊ธฐ ์ฒ๋ฆฌ: ๋๋์ ์์ฒญ์ ๋น๋๊ธฐ๋ก ์ฒ๋ฆฌํ์ธ์