Spaces:
Paused
Paused
| import os | |
| import uuid | |
| import time | |
| import hashlib | |
| import traceback | |
| from datetime import datetime | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse, HTMLResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from huggingface_hub import InferenceClient, hf_hub_download | |
| from llama_cpp import Llama | |
| # ============ Configuração ============ | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| API_KEY = os.environ.get("API_KEY", HF_TOKEN) | |
| # ============ Modelo Local - LFM2-8B-A1B (GGUF - CPU Otimizado) ============ | |
| print("🔄 Baixando e carregando LFM2-8B-A1B (GGUF)...") | |
| # Baixar modelo GGUF (Q4_K_M para equilíbrio entre qualidade e memória ~5.5GB) | |
| REPO_ID = "bartowski/LiquidAI_LFM2-8B-A1B-GGUF" | |
| FILENAME = "LiquidAI_LFM2-8B-A1B-Q4_K_M.gguf" | |
| try: | |
| model_path = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=FILENAME, | |
| token=HF_TOKEN | |
| ) | |
| print(f"✅ Modelo baixado em: {model_path}") | |
| # Carregar modelo com llama.cpp | |
| chat_model = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, # Contexto | |
| n_threads=8, # Threads da CPU | |
| n_batch=512, | |
| verbose=False | |
| ) | |
| print("✅ LFM2-8B-A1B carregado com sucesso na memória!") | |
| except Exception as e: | |
| print(f"❌ Erro ao carregar modelo: {e}") | |
| chat_model = None | |
| # ============ Clientes de Modelos (Inference API) ============ | |
| # Visão - Análise de imagens | |
| vision_client = InferenceClient(token=HF_TOKEN, model="google/gemma-3-27b-it") | |
| # Embeddings - Vetores semânticos | |
| embed_client = InferenceClient(token=HF_TOKEN, model="BAAI/bge-m3") | |
| # Classificação Zero-Shot (Multilíngue - PT/EN/ES...) | |
| classify_client = InferenceClient(token=HF_TOKEN, model="joeddav/xlm-roberta-large-xnli") | |
| # Sumarização (Multilíngue - 45 idiomas incluindo PT) | |
| summarize_client = InferenceClient(token=HF_TOKEN, model="csebuetnlp/mT5_multilingual_XLSum") | |
| # Análise de Sentimento (Multilíngue - PT/EN/ES...) | |
| sentiment_client = InferenceClient(token=HF_TOKEN, model="lxyuan/distilbert-base-multilingual-cased-sentiments-student") | |
| # ============ Função de Chat Local ============ | |
| def generate_local_chat(messages, max_tokens=1024, temperature=0.7): | |
| """Gera resposta usando o modelo local LFM2-8B-A1B (GGUF)""" | |
| if not chat_model: | |
| return "Erro: Modelo não carregado." | |
| # Usar chat_completion nativo do llama-cpp-python (já lida com templates) | |
| output = chat_model.create_chat_completion( | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stop=["<|im_end|>", "<|endoftext|>"] | |
| ) | |
| return output['choices'][0]['message']['content'] | |
| # ============ Cache ============ | |
| response_cache = {} | |
| CACHE_MAX_SIZE = 500 | |
| CACHE_TTL_SECONDS = 3600 | |
| def get_cache_key(content, task): | |
| data = str(content) + task | |
| return hashlib.md5(data.encode()).hexdigest() | |
| def get_cached_response(key): | |
| if key in response_cache: | |
| entry = response_cache[key] | |
| if time.time() - entry["timestamp"] < CACHE_TTL_SECONDS: | |
| return entry["response"] | |
| else: | |
| del response_cache[key] | |
| return None | |
| def set_cached_response(key, response): | |
| if len(response_cache) >= CACHE_MAX_SIZE: | |
| oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"]) | |
| del response_cache[oldest_key] | |
| response_cache[key] = {"response": response, "timestamp": time.time()} | |
| def verify_api_key(request: Request) -> bool: | |
| auth = request.headers.get("Authorization", "") | |
| return auth.startswith("Bearer ") and auth[7:] == API_KEY | |
| def has_image_content(messages): | |
| for msg in messages: | |
| content = msg.get("content", []) | |
| if isinstance(content, list): | |
| for item in content: | |
| if isinstance(item, dict) and item.get("type") == "image_url": | |
| return True | |
| return False | |
| # ============ FastAPI ============ | |
| app = FastAPI( | |
| title="DGGirl Multi-Modal API", | |
| description="API compatível com OpenAI para chat, visão, embeddings, classificação, sumarização e sentimento", | |
| version="4.1.0" | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ============ Página Inicial ============ | |
| async def home(): | |
| endpoints_html = """ | |
| <div class="endpoint"><span class="method">POST</span> <code>/v1/chat/completions</code><p>💬 Chat inteligente (LFM2-8B GGUF) + Visão (Gemma 3)</p></div> | |
| <div class="endpoint"><span class="method">POST</span> <code>/v1/embeddings</code><p>🔢 Vetores semânticos para RAG (BGE-M3)</p></div> | |
| <div class="endpoint"><span class="method">POST</span> <code>/v1/classify</code><p>🏷️ Classificação zero-shot de textos</p></div> | |
| <div class="endpoint"><span class="method">POST</span> <code>/v1/summarize</code><p>📝 Resumir textos longos</p></div> | |
| <div class="endpoint"><span class="method">POST</span> <code>/v1/sentiment</code><p>😊 Análise de sentimento</p></div> | |
| """ | |
| return f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>DGGirl API v4.1</title> | |
| <style> | |
| body {{ font-family: 'Segoe UI', Tahoma, sans-serif; max-width: 900px; margin: 40px auto; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); min-height: 100vh; }} | |
| .container {{ background: rgba(255,255,255,0.95); padding: 40px; border-radius: 20px; box-shadow: 0 10px 40px rgba(0,0,0,0.3); }} | |
| h1 {{ color: #1a73e8; border-bottom: 3px solid #4285f4; padding-bottom: 15px; margin-bottom: 20px; }} | |
| .status {{ background: linear-gradient(135deg, #00c853, #69f0ae); color: white; padding: 8px 16px; border-radius: 25px; font-weight: bold; font-size: 0.9em; display: inline-block; }} | |
| .endpoint {{ background: #f8f9fa; padding: 18px; margin: 12px 0; border-radius: 12px; border-left: 6px solid #4285f4; transition: transform 0.2s; }} | |
| .endpoint:hover {{ transform: translateX(5px); background: #e8f0fe; }} | |
| .method {{ background: #d93025; color: white; padding: 4px 10px; border-radius: 5px; font-weight: bold; font-size: 0.85em; }} | |
| code {{ background: #e8eaed; padding: 4px 10px; border-radius: 6px; font-family: 'Consolas', monospace; font-size: 0.95em; }} | |
| .models {{ background: #e3f2fd; padding: 20px; border-radius: 12px; margin-top: 20px; }} | |
| .models h3 {{ margin-top: 0; color: #1565c0; }} | |
| .model-tag {{ display: inline-block; background: #1a73e8; color: white; padding: 5px 12px; border-radius: 15px; margin: 4px; font-size: 0.85em; }} | |
| a {{ color: #1a73e8; text-decoration: none; }} | |
| a:hover {{ text-decoration: underline; }} | |
| .stats {{ display: flex; gap: 20px; margin-top: 20px; }} | |
| .stat {{ background: #fff3e0; padding: 15px; border-radius: 10px; flex: 1; text-align: center; }} | |
| .stat-value {{ font-size: 1.5em; font-weight: bold; color: #e65100; }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>🤖 DGGirl API v4.1 - CPU Optimized</h1> | |
| <p>Status: <span class="status">● OPERACIONAL</span></p> | |
| {endpoints_html} | |
| <div class="models"> | |
| <h3>🧠 Modelos Ativos</h3> | |
| <span class="model-tag">LFM2-8B-A1B (GGUF Q4)</span> | |
| <span class="model-tag">Gemma 3 27B Vision</span> | |
| <span class="model-tag">BGE-M3 Embeddings</span> | |
| <span class="model-tag">XLM-RoBERTa Classification</span> | |
| <span class="model-tag">mT5 Summarization</span> | |
| <span class="model-tag">DistilBERT Sentiment</span> | |
| </div> | |
| <div class="stats"> | |
| <div class="stat"> | |
| <div class="stat-value">{len(response_cache)}</div> | |
| <div>Cache Items</div> | |
| </div> | |
| <div class="stat"> | |
| <div class="stat-value">6</div> | |
| <div>Endpoints</div> | |
| </div> | |
| </div> | |
| <p style="margin-top: 25px; text-align: center;"> | |
| <a href="/docs">📚 Documentação Swagger</a> | | |
| <a href="/health">❤️ Health Check</a> | |
| </p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| # ============ Chat Completions (Texto + Visão) ============ | |
| async def chat_completions(request: Request): | |
| if not verify_api_key(request): | |
| return JSONResponse(status_code=401, content={"error": "Invalid API key"}) | |
| try: | |
| body = await request.json() | |
| raw_messages = body.get("messages", []) | |
| model = body.get("model", "auto") | |
| # Detectar se precisa de visão | |
| has_vision = model == "vision" or has_image_content(raw_messages) | |
| model_used = "google/gemma-3-27b-it" if has_vision else "LiquidAI/LFM2-8B-A1B-GGUF" | |
| # Cache (apenas para texto) | |
| cache_key = get_cache_key(raw_messages, model_used) | |
| if not has_vision: | |
| cached = get_cached_response(cache_key) | |
| if cached: | |
| return cached | |
| # Gerar resposta | |
| if has_vision: | |
| last_user_msg = next((msg for msg in reversed(raw_messages) if msg.get("role") == "user"), None) | |
| if not last_user_msg: | |
| return JSONResponse(status_code=400, content={"error": "No user message"}) | |
| content = last_user_msg.get("content", []) | |
| vision_content = [] | |
| text_parts = [] | |
| if isinstance(content, list): | |
| for item in content: | |
| if isinstance(item, dict): | |
| if item.get("type") == "text": | |
| text_parts.append(item.get("text", "")) | |
| elif item.get("type") == "image_url": | |
| url = item.get("image_url", {}).get("url", "") | |
| if url: | |
| vision_content.append({"type": "image_url", "image_url": {"url": url}}) | |
| final_text = " ".join(text_parts) if text_parts else "Analise a imagem." | |
| vision_content.append({"type": "text", "text": final_text}) | |
| messages = [{"role": "user", "content": vision_content}] | |
| else: | |
| messages = raw_messages | |
| response = vision_client.chat_completion( | |
| messages=messages, | |
| max_tokens=body.get("max_tokens", 1024), | |
| temperature=body.get("temperature", 0.7) | |
| ) | |
| response_content = response.choices[0].message.content | |
| else: | |
| # Usar modelo local (GGUF) para texto | |
| try: | |
| response_content = generate_local_chat( | |
| messages=raw_messages, | |
| max_tokens=body.get("max_tokens", 1024), | |
| temperature=body.get("temperature", 0.7) | |
| ) | |
| except Exception as e: | |
| response_content = f"Error generating response: {str(e)}" | |
| result = { | |
| "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", | |
| "object": "chat.completion", | |
| "created": int(time.time()), | |
| "model": model_used, | |
| "choices": [{ | |
| "index": 0, | |
| "message": { | |
| "role": "assistant", | |
| "content": response_content | |
| }, | |
| "finish_reason": "stop" | |
| }], | |
| "usage": { | |
| "prompt_tokens": 0, | |
| "completion_tokens": 0, | |
| "total_tokens": 0 | |
| } | |
| } | |
| if not has_vision: | |
| set_cached_response(cache_key, result) | |
| return result | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"error": str(e), "detail": traceback.format_exc()}) | |
| # ============ Embeddings ============ | |
| async def create_embeddings(request: Request): | |
| if not verify_api_key(request): | |
| return JSONResponse(status_code=401, content={"error": "Invalid API key"}) | |
| try: | |
| body = await request.json() | |
| input_text = body.get("input", "") | |
| texts = input_text if isinstance(input_text, list) else [input_text] | |
| embeddings_data = [] | |
| for idx, text in enumerate(texts): | |
| res = embed_client.feature_extraction(text) | |
| embedding = res.tolist() if hasattr(res, 'tolist') else res | |
| embeddings_data.append({ | |
| "object": "embedding", | |
| "index": idx, | |
| "embedding": embedding | |
| }) | |
| return { | |
| "object": "list", | |
| "data": embeddings_data, | |
| "model": "bge-m3", | |
| "usage": {"prompt_tokens": sum(len(t.split()) for t in texts), "total_tokens": sum(len(t.split()) for t in texts)} | |
| } | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"error": str(e), "detail": traceback.format_exc()}) | |
| # ============ Classificação Zero-Shot ============ | |
| async def classify_text(request: Request): | |
| if not verify_api_key(request): | |
| return JSONResponse(status_code=401, content={"error": "Invalid API key"}) | |
| try: | |
| body = await request.json() | |
| text = body.get("text", "") | |
| labels = body.get("labels", ["positive", "negative", "neutral"]) | |
| multi_label = body.get("multi_label", False) | |
| if not text: | |
| return JSONResponse(status_code=400, content={"error": "Text is required"}) | |
| # Cache | |
| cache_key = get_cache_key(text + str(labels), "classify") | |
| cached = get_cached_response(cache_key) | |
| if cached: | |
| return cached | |
| result = classify_client.zero_shot_classification( | |
| text, | |
| labels, | |
| multi_label=multi_label | |
| ) | |
| response = { | |
| "object": "classification", | |
| "text": text, | |
| "labels": result.labels if hasattr(result, 'labels') else labels, | |
| "scores": result.scores if hasattr(result, 'scores') else [], | |
| "predicted_label": result.labels[0] if hasattr(result, 'labels') and result.labels else None, | |
| "model": "xlm-roberta-large-xnli" | |
| } | |
| set_cached_response(cache_key, response) | |
| return response | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"error": str(e), "detail": traceback.format_exc()}) | |
| # ============ Sumarização ============ | |
| async def summarize_text(request: Request): | |
| if not verify_api_key(request): | |
| return JSONResponse(status_code=401, content={"error": "Invalid API key"}) | |
| try: | |
| body = await request.json() | |
| text = body.get("text", "") | |
| max_length = body.get("max_length", 150) | |
| min_length = body.get("min_length", 30) | |
| if not text: | |
| return JSONResponse(status_code=400, content={"error": "Text is required"}) | |
| # Cache | |
| cache_key = get_cache_key(text, "summarize") | |
| cached = get_cached_response(cache_key) | |
| if cached: | |
| return cached | |
| result = summarize_client.summarization( | |
| text, | |
| parameters={"max_length": max_length, "min_length": min_length} | |
| ) | |
| summary = result.summary_text if hasattr(result, 'summary_text') else str(result) | |
| response = { | |
| "object": "summarization", | |
| "original_length": len(text), | |
| "summary": summary, | |
| "summary_length": len(summary), | |
| "compression_ratio": round(len(summary) / len(text) * 100, 2), | |
| "model": "mt5-multilingual" | |
| } | |
| set_cached_response(cache_key, response) | |
| return response | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"error": str(e), "detail": traceback.format_exc()}) | |
| # ============ Análise de Sentimento ============ | |
| async def analyze_sentiment(request: Request): | |
| if not verify_api_key(request): | |
| return JSONResponse(status_code=401, content={"error": "Invalid API key"}) | |
| try: | |
| body = await request.json() | |
| text = body.get("text", "") | |
| if not text: | |
| return JSONResponse(status_code=400, content={"error": "Text is required"}) | |
| # Cache | |
| cache_key = get_cache_key(text, "sentiment") | |
| cached = get_cached_response(cache_key) | |
| if cached: | |
| return cached | |
| result = sentiment_client.text_classification(text) | |
| # Mapear labels | |
| label_map = { | |
| "positive": "positivo", | |
| "negative": "negativo", | |
| "neutral": "neutro", | |
| "POSITIVE": "positivo", | |
| "NEGATIVE": "negativo", | |
| "NEUTRAL": "neutro", | |
| "1 star": "negativo", | |
| "5 stars": "positivo" | |
| } | |
| if isinstance(result, list) and len(result) > 0: | |
| top_result = result[0] | |
| label = top_result.label if hasattr(top_result, 'label') else str(top_result) | |
| score = top_result.score if hasattr(top_result, 'score') else 0.0 | |
| else: | |
| label = str(result) | |
| score = 1.0 | |
| response = { | |
| "object": "sentiment", | |
| "text": text, | |
| "sentiment": label_map.get(label, label), | |
| "sentiment_raw": label, | |
| "confidence": round(score, 4), | |
| "all_scores": [{"label": r.label, "score": round(r.score, 4)} for r in result] if isinstance(result, list) else [], | |
| "model": "distilbert-base-multilingual" | |
| } | |
| set_cached_response(cache_key, response) | |
| return response | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"error": str(e), "detail": traceback.format_exc()}) | |
| # ============ Endpoints Auxiliares ============ | |
| async def list_models(): | |
| return { | |
| "object": "list", | |
| "data": [ | |
| {"id": "lfm2-8b-gguf", "object": "model", "owned_by": "liquidai", "description": "Chat rápido (GGUF Q4)"}, | |
| {"id": "gemma-3-vision", "object": "model", "owned_by": "google", "description": "Análise de imagens"}, | |
| {"id": "bge-m3", "object": "model", "owned_by": "baai", "description": "Embeddings multilíngue"}, | |
| {"id": "xlm-roberta-classify", "object": "model", "owned_by": "joeddav", "description": "Classificação zero-shot multilíngue"}, | |
| {"id": "mt5-summarize", "object": "model", "owned_by": "csebuetnlp", "description": "Sumarização multilíngue"}, | |
| {"id": "distilbert-sentiment", "object": "model", "owned_by": "lxyuan", "description": "Análise de sentimento multilíngue"} | |
| ] | |
| } | |
| async def health(): | |
| return { | |
| "status": "healthy", | |
| "timestamp": datetime.now().isoformat(), | |
| "cache_size": len(response_cache), | |
| "version": "4.1.0", | |
| "models": { | |
| "chat": "LiquidAI/LFM2-8B-A1B-GGUF (Q4)", | |
| "vision": "google/gemma-3-27b-it", | |
| "embeddings": "BAAI/bge-m3", | |
| "classify": "joeddav/xlm-roberta-large-xnli", | |
| "summarize": "csebuetnlp/mT5_multilingual_XLSum", | |
| "sentiment": "lxyuan/distilbert-base-multilingual-cased-sentiments-student" | |
| } | |
| } | |
| async def clear_cache(request: Request): | |
| if not verify_api_key(request): | |
| return JSONResponse(status_code=401, content={"error": "Invalid API key"}) | |
| global response_cache | |
| response_cache = {} | |
| return {"message": "Cache cleared", "timestamp": datetime.now().isoformat()} | |