Spaces:
Running
Running
Update app.py
#1
by
GABASSI
- opened
app.py
CHANGED
|
@@ -4,12 +4,12 @@ import base64
|
|
| 4 |
import nest_asyncio
|
| 5 |
import shutil
|
| 6 |
import logging
|
| 7 |
-
from typing import Any
|
| 8 |
-
from fastapi import FastAPI, UploadFile, File
|
| 9 |
from pydantic import BaseModel
|
| 10 |
from huggingface_hub import InferenceClient
|
| 11 |
|
| 12 |
-
# LlamaIndex
|
| 13 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
|
| 14 |
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
|
| 15 |
from llama_index.core.llms.callbacks import llm_completion_callback
|
|
@@ -27,7 +27,7 @@ LLAMA_KEY = os.getenv("LLAMA_KEY")
|
|
| 27 |
nest_asyncio.apply()
|
| 28 |
app = FastAPI()
|
| 29 |
|
| 30 |
-
# --- CONECTOR UNIVERSAL HUGGING FACE (
|
| 31 |
class HFCustomLLM(CustomLLM):
|
| 32 |
model_name: str = "google/gemma-1.1-7b-it"
|
| 33 |
token: str = None
|
|
@@ -35,7 +35,6 @@ class HFCustomLLM(CustomLLM):
|
|
| 35 |
|
| 36 |
def __init__(self, model_name: str, token: str, **kwargs: Any):
|
| 37 |
super().__init__(model_name=model_name, token=token, **kwargs)
|
| 38 |
-
# Usa o cliente oficial do Hugging Face que sabe o endereço certo
|
| 39 |
self.client = InferenceClient(model=model_name, token=token)
|
| 40 |
|
| 41 |
@property
|
|
@@ -44,22 +43,22 @@ class HFCustomLLM(CustomLLM):
|
|
| 44 |
|
| 45 |
@llm_completion_callback()
|
| 46 |
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
|
| 47 |
-
#
|
| 48 |
-
|
| 49 |
try:
|
| 50 |
response_text = self.client.text_generation(
|
| 51 |
-
|
| 52 |
max_new_tokens=512,
|
| 53 |
temperature=0.1,
|
| 54 |
do_sample=True
|
| 55 |
)
|
| 56 |
return CompletionResponse(text=response_text)
|
| 57 |
except Exception as e:
|
| 58 |
-
raise ValueError(f"Erro
|
| 59 |
|
| 60 |
@llm_completion_callback()
|
| 61 |
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
|
| 62 |
-
#
|
| 63 |
yield self.complete(prompt, **kwargs)
|
| 64 |
|
| 65 |
# --- SISTEMA COGNILINE ---
|
|
@@ -82,7 +81,7 @@ class CoreSystem:
|
|
| 82 |
os.environ["LLAMA_CLOUD_API_KEY"] = llama_key
|
| 83 |
|
| 84 |
# 1. Configura o nosso Conector Universal (Google Gemma)
|
| 85 |
-
# Este modelo é muito estável
|
| 86 |
Settings.llm = HFCustomLLM(
|
| 87 |
model_name="google/gemma-1.1-7b-it",
|
| 88 |
token=hf_token
|
|
@@ -224,7 +223,7 @@ function App() {
|
|
| 224 |
</div>
|
| 225 |
) : (
|
| 226 |
<div className="p-4 bg-green-900/20 border border-green-900 rounded text-[10px] text-green-400">
|
| 227 |
-
|
| 228 |
</div>
|
| 229 |
)}
|
| 230 |
</aside>
|
|
|
|
| 4 |
import nest_asyncio
|
| 5 |
import shutil
|
| 6 |
import logging
|
| 7 |
+
from typing import Any
|
| 8 |
+
from fastapi import FastAPI, UploadFile, File
|
| 9 |
from pydantic import BaseModel
|
| 10 |
from huggingface_hub import InferenceClient
|
| 11 |
|
| 12 |
+
# Imports do LlamaIndex
|
| 13 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
|
| 14 |
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
|
| 15 |
from llama_index.core.llms.callbacks import llm_completion_callback
|
|
|
|
| 27 |
nest_asyncio.apply()
|
| 28 |
app = FastAPI()
|
| 29 |
|
| 30 |
+
# --- CONECTOR UNIVERSAL HUGGING FACE (Solução para Erro 404) ---
|
| 31 |
class HFCustomLLM(CustomLLM):
|
| 32 |
model_name: str = "google/gemma-1.1-7b-it"
|
| 33 |
token: str = None
|
|
|
|
| 35 |
|
| 36 |
def __init__(self, model_name: str, token: str, **kwargs: Any):
|
| 37 |
super().__init__(model_name=model_name, token=token, **kwargs)
|
|
|
|
| 38 |
self.client = InferenceClient(model=model_name, token=token)
|
| 39 |
|
| 40 |
@property
|
|
|
|
| 43 |
|
| 44 |
@llm_completion_callback()
|
| 45 |
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
|
| 46 |
+
# Formata o prompt para o modelo Gemma entender que é um chat
|
| 47 |
+
formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
|
| 48 |
try:
|
| 49 |
response_text = self.client.text_generation(
|
| 50 |
+
formatted_prompt,
|
| 51 |
max_new_tokens=512,
|
| 52 |
temperature=0.1,
|
| 53 |
do_sample=True
|
| 54 |
)
|
| 55 |
return CompletionResponse(text=response_text)
|
| 56 |
except Exception as e:
|
| 57 |
+
raise ValueError(f"Erro na API Hugging Face: {str(e)}")
|
| 58 |
|
| 59 |
@llm_completion_callback()
|
| 60 |
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
|
| 61 |
+
# Sem stream para garantir estabilidade
|
| 62 |
yield self.complete(prompt, **kwargs)
|
| 63 |
|
| 64 |
# --- SISTEMA COGNILINE ---
|
|
|
|
| 81 |
os.environ["LLAMA_CLOUD_API_KEY"] = llama_key
|
| 82 |
|
| 83 |
# 1. Configura o nosso Conector Universal (Google Gemma)
|
| 84 |
+
# Este modelo é muito estável na versão gratuita
|
| 85 |
Settings.llm = HFCustomLLM(
|
| 86 |
model_name="google/gemma-1.1-7b-it",
|
| 87 |
token=hf_token
|
|
|
|
| 223 |
</div>
|
| 224 |
) : (
|
| 225 |
<div className="p-4 bg-green-900/20 border border-green-900 rounded text-[10px] text-green-400">
|
| 226 |
+
NÚCLEO OPERACIONAL.<br/>Pronto para análise.
|
| 227 |
</div>
|
| 228 |
)}
|
| 229 |
</aside>
|