GABASSI's picture
Update app.py
2b4dbeb verified
raw
history blame
11.1 kB
import os
import uvicorn
import base64
import nest_asyncio
import shutil
import logging
from typing import Any
from fastapi import FastAPI, UploadFile, File
from pydantic import BaseModel
from huggingface_hub import InferenceClient
# Imports do LlamaIndex
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
# --- LOGGING ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("CognilineCore")
# --- RECUPERAÇÃO DE CHAVES ---
HF_TOKEN = os.getenv("HF_TOKEN")
LLAMA_KEY = os.getenv("LLAMA_KEY")
nest_asyncio.apply()
app = FastAPI()
# --- CONECTOR UNIVERSAL HUGGING FACE (Solução para Erro 404) ---
class HFCustomLLM(CustomLLM):
model_name: str = "google/gemma-1.1-7b-it"
token: str = None
client: Any = None
def __init__(self, model_name: str, token: str, **kwargs: Any):
super().__init__(model_name=model_name, token=token, **kwargs)
self.client = InferenceClient(model=model_name, token=token)
@property
def metadata(self) -> LLMMetadata:
return LLMMetadata(model_name=self.model_name, num_output=512, context_window=4096)
@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
# Formata o prompt para o modelo Gemma entender que é um chat
formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
try:
response_text = self.client.text_generation(
formatted_prompt,
max_new_tokens=512,
temperature=0.1,
do_sample=True
)
return CompletionResponse(text=response_text)
except Exception as e:
raise ValueError(f"Erro na API Hugging Face: {str(e)}")
@llm_completion_callback()
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
# Sem stream para garantir estabilidade
yield self.complete(prompt, **kwargs)
# --- SISTEMA COGNILINE ---
COGNILINE_PROMPT = (
"Tu és o COGNILINE Sênior. Especialista em Automação Siemens (TIA Portal) e Normas.\n"
"FILOSOFIA: Segurança é Lógica. Priorize NR-10, NR-12 e IEC 61131-3.\n"
"Responde apenas à questão técnica abaixo com base no contexto fornecido.\n\n"
"CONTEXTO:\n{context_str}\n\n"
"PERGUNTA:\n{query_str}\n"
)
class CoreSystem:
def __init__(self):
self.engine = None
self.active = False
def start(self, hf_token, llama_key, data_dir):
try:
logger.info("A ligar os motores...")
os.environ["LLAMA_CLOUD_API_KEY"] = llama_key
# 1. Configura o nosso Conector Universal (Google Gemma)
# Este modelo é muito estável na versão gratuita
Settings.llm = HFCustomLLM(
model_name="google/gemma-1.1-7b-it",
token=hf_token
)
# 2. Configura Embeddings
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# 3. Processa PDF
parser = LlamaParse(result_type="markdown", language="pt")
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(data_dir, file_extractor=file_extractor).load_data()
index = VectorStoreIndex.from_documents(documents)
self.engine = index.as_query_engine(text_qa_template=PromptTemplate(COGNILINE_PROMPT))
self.active = True
logger.info("Sistema Pronto!")
return True, "COGNILINE Conectado via Google Gemma."
except Exception as e:
logger.error(f"Erro: {e}")
return False, str(e)
cogniline = CoreSystem()
@app.post("/api/init")
async def init_api(files: list[UploadFile] = File(...)):
try:
if not HF_TOKEN or not LLAMA_KEY:
return {"status": "error", "message": "Chaves não encontradas nos Settings."}
temp_dir = "/tmp/cogniline_data"
if os.path.exists(temp_dir): shutil.rmtree(temp_dir)
os.makedirs(temp_dir, exist_ok=True)
for f in files:
with open(os.path.join(temp_dir, f.filename), "wb") as buffer:
buffer.write(await f.read())
success, msg = cogniline.start(HF_TOKEN, LLAMA_KEY, temp_dir)
return {"status": "ok" if success else "error", "message": msg}
except Exception as e:
return {"status": "error", "message": str(e)}
class UserQ(BaseModel): query: str
@app.post("/api/ask")
async def ask_api(q: UserQ):
if not cogniline.active: return {"ans": "⚠️ Sistema Offline. Carregue os manuais."}
try:
return {"ans": str(cogniline.engine.query(q.query))}
except Exception as e:
return {"ans": f"Erro de Inferência: {str(e)}"}
# --- FRONTEND (React) ---
HTML = """
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>COGNILINE</title>
<script src="https://unpkg.com/react@18/umd/react.production.min.js"></script>
<script src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js"></script>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/lucide@latest"></script>
<script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
<style>
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&display=swap');
body { background: #0a0a0a; color: #ccc; font-family: 'JetBrains Mono', monospace; margin:0; }
.btn { transition: 0.2s; } .btn:hover { filter: brightness(1.2); }
</style>
</head>
<body>
<div id="root"></div>
<script type="text/babel">
const { useState, useEffect, useRef } = React;
function App() {
const [status, setStatus] = useState("DESCONECTADO");
const [loading, setLoading] = useState(false);
const [chat, setChat] = useState([]);
const [msg, setMsg] = useState("");
const [files, setFiles] = useState(null);
const end = useRef(null);
useEffect(() => {
lucide.createIcons();
end.current?.scrollIntoView({ behavior: "smooth" });
}, [chat]);
const boot = async () => {
if(!files) return alert("Selecione os manuais PDF.");
setLoading(true);
const fd = new FormData();
for(let f of files) fd.append("files", f);
try {
const r = await fetch("/api/init", { method: "POST", body: fd });
const d = await r.json();
if(d.status === "ok") {
setStatus("ONLINE");
setChat([{ r: 'ai', t: d.message }]);
} else { alert(d.message); }
} catch(e) { alert("Erro de rede."); }
setLoading(false);
};
const send = async () => {
if(!msg.trim() || loading) return;
const t = msg; setMsg("");
setChat(c => [...c, { r: 'u', t: t }]);
setLoading(true);
try {
const r = await fetch("/api/ask", {
method: "POST", headers: { "Content-Type": "application/json" },
body: JSON.stringify({ query: t })
});
const d = await r.json();
setChat(c => [...c, { r: 'ai', t: d.ans }]);
} catch(e) { setChat(c => [...c, { r: 'ai', t: "Erro de comunicação." }]); }
setLoading(false);
};
return (
<div className="flex h-screen">
<aside className="w-80 bg-[#111] border-r border-[#333] p-6 flex flex-col gap-6">
<div className="flex items-center gap-3">
<img src="LOGO_B64" className="h-10 w-10 object-contain" />
<h1 className="text-white font-bold text-xl tracking-tighter">COGNILINE</h1>
</div>
<div className="text-xs font-bold">STATUS: <span className={status==="ONLINE"?"text-green-500":"text-red-500"}>{status}</span></div>
{status !== "ONLINE" ? (
<div className="flex flex-col gap-3 pt-4 border-t border-[#333]">
<label className="text-[10px] text-gray-500">ARQUIVOS PDF (MANUAIS)</label>
<input type="file" multiple onChange={e => setFiles(e.target.files)} className="text-[10px] text-gray-400" />
<button onClick={boot} disabled={loading} className="btn bg-blue-700 p-3 rounded text-white text-xs font-bold flex items-center justify-center gap-2">
{loading ? <i data-lucide="loader-2" className="animate-spin w-4 h-4"></i> : <i data-lucide="power" className="w-4 h-4"></i>}
{loading ? 'CALIBRANDO...' : 'ATIVAR SISTEMA'}
</button>
</div>
) : (
<div className="p-4 bg-green-900/20 border border-green-900 rounded text-[10px] text-green-400">
NÚCLEO OPERACIONAL.<br/>Pronto para análise.
</div>
)}
</aside>
<main className="flex-1 flex flex-col bg-[#050505]">
<div className="flex-1 overflow-y-auto p-8 space-y-6">
{chat.map((m, i) => (
<div key={i} className={`flex ${m.r === 'u' ? 'justify-end' : 'justify-start'}`}>
<div className={`max-w-3xl p-4 rounded border ${m.r === 'u' ? 'border-blue-900 bg-blue-900/10 text-blue-100' : 'border-[#333] bg-[#111] text-gray-300'}`}>
<p className="text-sm leading-relaxed whitespace-pre-wrap">{m.t}</p>
</div>
</div>
))}
<div ref={end} />
</div>
<div className="p-6 border-t border-[#333] bg-[#111] flex gap-4">
<input value={msg} onChange={e => setMsg(e.target.value)} onKeyDown={e => e.key === 'Enter' && send()} disabled={status!=="ONLINE"} className="flex-1 bg-black border border-[#333] p-3 rounded text-sm outline-none focus:border-blue-600" placeholder="Descreva a falha ou solicite código..." />
<button onClick={send} disabled={status!=="ONLINE"} className="btn bg-blue-700 px-6 rounded text-white font-bold text-xs">ENVIAR</button>
</div>
</main>
</div>
);
}
ReactDOM.createRoot(document.getElementById('root')).render(<App />);
</script>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
async def index():
l = ""
if os.path.exists("logo.png"):
with open("logo.png", "rb") as f:
l = f"data:image/png;base64,{base64.b64encode(f.read()).decode()}"
return HTML.replace("LOGO_B64", l)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)