Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -4,12 +4,12 @@ import base64
4
  import nest_asyncio
5
  import shutil
6
  import logging
7
- from typing import Any, List, Optional, Dict
8
- from fastapi import FastAPI, UploadFile, File, Form
9
  from pydantic import BaseModel
10
  from huggingface_hub import InferenceClient
11
 
12
- # LlamaIndex Imports
13
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
14
  from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
15
  from llama_index.core.llms.callbacks import llm_completion_callback
@@ -27,7 +27,7 @@ LLAMA_KEY = os.getenv("LLAMA_KEY")
27
  nest_asyncio.apply()
28
  app = FastAPI()
29
 
30
- # --- CONECTOR UNIVERSAL HUGGING FACE (A SOLUÇÃO DO ERRO 404) ---
31
  class HFCustomLLM(CustomLLM):
32
  model_name: str = "google/gemma-1.1-7b-it"
33
  token: str = None
@@ -35,7 +35,6 @@ class HFCustomLLM(CustomLLM):
35
 
36
  def __init__(self, model_name: str, token: str, **kwargs: Any):
37
  super().__init__(model_name=model_name, token=token, **kwargs)
38
- # Usa o cliente oficial do Hugging Face que sabe o endereço certo
39
  self.client = InferenceClient(model=model_name, token=token)
40
 
41
  @property
@@ -44,22 +43,22 @@ class HFCustomLLM(CustomLLM):
44
 
45
  @llm_completion_callback()
46
  def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
47
- # Força o uso do endpoint simples de geração de texto
48
- full_prompt = f"User: {prompt}\nAssistant:"
49
  try:
50
  response_text = self.client.text_generation(
51
- full_prompt,
52
  max_new_tokens=512,
53
  temperature=0.1,
54
  do_sample=True
55
  )
56
  return CompletionResponse(text=response_text)
57
  except Exception as e:
58
- raise ValueError(f"Erro no Hugging Face: {str(e)}")
59
 
60
  @llm_completion_callback()
61
  def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
62
- # Simplificação: Não usamos stream para evitar erros de rede
63
  yield self.complete(prompt, **kwargs)
64
 
65
  # --- SISTEMA COGNILINE ---
@@ -82,7 +81,7 @@ class CoreSystem:
82
  os.environ["LLAMA_CLOUD_API_KEY"] = llama_key
83
 
84
  # 1. Configura o nosso Conector Universal (Google Gemma)
85
- # Este modelo é muito estável e não costuma dar 404
86
  Settings.llm = HFCustomLLM(
87
  model_name="google/gemma-1.1-7b-it",
88
  token=hf_token
@@ -224,7 +223,7 @@ function App() {
224
  </div>
225
  ) : (
226
  <div className="p-4 bg-green-900/20 border border-green-900 rounded text-[10px] text-green-400">
227
- SISTEMA OPERACIONAL.<br/>Pronto para análise técnica.
228
  </div>
229
  )}
230
  </aside>
 
4
  import nest_asyncio
5
  import shutil
6
  import logging
7
+ from typing import Any
8
+ from fastapi import FastAPI, UploadFile, File
9
  from pydantic import BaseModel
10
  from huggingface_hub import InferenceClient
11
 
12
+ # Imports do LlamaIndex
13
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
14
  from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
15
  from llama_index.core.llms.callbacks import llm_completion_callback
 
27
  nest_asyncio.apply()
28
  app = FastAPI()
29
 
30
+ # --- CONECTOR UNIVERSAL HUGGING FACE (Solução para Erro 404) ---
31
  class HFCustomLLM(CustomLLM):
32
  model_name: str = "google/gemma-1.1-7b-it"
33
  token: str = None
 
35
 
36
  def __init__(self, model_name: str, token: str, **kwargs: Any):
37
  super().__init__(model_name=model_name, token=token, **kwargs)
 
38
  self.client = InferenceClient(model=model_name, token=token)
39
 
40
  @property
 
43
 
44
  @llm_completion_callback()
45
  def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
46
+ # Formata o prompt para o modelo Gemma entender que é um chat
47
+ formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
48
  try:
49
  response_text = self.client.text_generation(
50
+ formatted_prompt,
51
  max_new_tokens=512,
52
  temperature=0.1,
53
  do_sample=True
54
  )
55
  return CompletionResponse(text=response_text)
56
  except Exception as e:
57
+ raise ValueError(f"Erro na API Hugging Face: {str(e)}")
58
 
59
  @llm_completion_callback()
60
  def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
61
+ # Sem stream para garantir estabilidade
62
  yield self.complete(prompt, **kwargs)
63
 
64
  # --- SISTEMA COGNILINE ---
 
81
  os.environ["LLAMA_CLOUD_API_KEY"] = llama_key
82
 
83
  # 1. Configura o nosso Conector Universal (Google Gemma)
84
+ # Este modelo é muito estável na versão gratuita
85
  Settings.llm = HFCustomLLM(
86
  model_name="google/gemma-1.1-7b-it",
87
  token=hf_token
 
223
  </div>
224
  ) : (
225
  <div className="p-4 bg-green-900/20 border border-green-900 rounded text-[10px] text-green-400">
226
+ NÚCLEO OPERACIONAL.<br/>Pronto para análise.
227
  </div>
228
  )}
229
  </aside>