plvictor commited on
Commit
39bc619
·
verified ·
1 Parent(s): d486fcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -14
app.py CHANGED
@@ -51,23 +51,28 @@ model_lock = threading.Lock()
51
 
52
  def generate_response(message: str, max_tokens: int = 200, temperature: float = 0.7) -> str:
53
  """Gerar resposta com o modelo"""
 
 
54
  try:
55
  with model_lock:
56
- prompt = f"<|system|>\nVocê é um assistente útil. Responda de forma clara e concisa.<|user|>\n{message}<|assistant|>\n"
 
 
57
 
58
  inputs = tokenizer(
59
  prompt,
60
  return_tensors="pt",
61
  truncation=True,
62
- max_length=1000,
63
  padding=False
64
  )
 
65
 
66
  with torch.no_grad():
67
  outputs = model.generate(
68
  inputs.input_ids,
69
- max_new_tokens=min(max_tokens, 300),
70
- temperature=max(0.1, min(temperature, 1.0)),
71
  do_sample=True,
72
  top_p=0.9,
73
  repetition_penalty=1.1,
@@ -75,18 +80,30 @@ def generate_response(message: str, max_tokens: int = 200, temperature: float =
75
  eos_token_id=tokenizer.eos_token_id
76
  )
77
 
 
 
 
 
 
78
  response = tokenizer.decode(
79
  outputs[0][len(inputs.input_ids[0]):],
80
  skip_special_tokens=True
81
  )
 
82
 
83
  # Limpar resposta
84
- response = response.split("<|user|>")[0].split("<|system|>")[0].strip()
 
 
 
 
85
 
86
- return response if response else "Não consegui gerar uma resposta."
87
 
88
  except Exception as e:
89
- raise HTTPException(status_code=500, detail=f"Erro na geração: {str(e)}")
 
 
90
 
91
  # Endpoints da API
92
 
@@ -115,38 +132,50 @@ async def health_check():
115
  @app.post("/chat", response_model=ChatResponse)
116
  async def chat_endpoint(request: ChatRequest):
117
  """Endpoint principal para chat"""
 
 
118
  if not request.message or not request.message.strip():
119
  raise HTTPException(status_code=400, detail="Mensagem não pode estar vazia")
120
 
121
  try:
122
- response = generate_response(
123
  message=request.message,
124
  max_tokens=request.max_tokens,
125
  temperature=request.temperature
126
  )
127
 
128
- return ChatResponse(response=response)
 
 
129
 
130
  except Exception as e:
131
- raise HTTPException(status_code=500, detail=str(e))
 
 
132
 
133
  @app.get("/chat")
134
  async def chat_get(message: str, max_tokens: int = 200, temperature: float = 0.7):
135
  """Endpoint GET para chat (mais simples de testar)"""
 
 
136
  if not message or not message.strip():
137
  raise HTTPException(status_code=400, detail="Parâmetro 'message' é obrigatório")
138
 
139
  try:
140
- response = generate_response(
141
  message=message,
142
  max_tokens=max_tokens,
143
  temperature=temperature
144
  )
145
 
146
- return {"response": response, "status": "success"}
 
 
147
 
148
  except Exception as e:
149
- raise HTTPException(status_code=500, detail=str(e))
 
 
150
 
151
  if __name__ == "__main__":
152
  print("🚀 Iniciando servidor FastAPI...")
@@ -161,5 +190,5 @@ if __name__ == "__main__":
161
  app,
162
  host="0.0.0.0",
163
  port=7860,
164
- log_level="error" # Reduzir logs
165
  )
 
51
 
52
  def generate_response(message: str, max_tokens: int = 200, temperature: float = 0.7) -> str:
53
  """Gerar resposta com o modelo"""
54
+ print(f"🔄 Gerando resposta para: '{message[:50]}...'")
55
+
56
  try:
57
  with model_lock:
58
+ # Prompt mais simples e direto
59
+ prompt = f"Human: {message}\nAssistant:"
60
+ print(f"📝 Prompt: {prompt}")
61
 
62
  inputs = tokenizer(
63
  prompt,
64
  return_tensors="pt",
65
  truncation=True,
66
+ max_length=800,
67
  padding=False
68
  )
69
+ print(f"🔢 Input tokens: {inputs.input_ids.shape[1]}")
70
 
71
  with torch.no_grad():
72
  outputs = model.generate(
73
  inputs.input_ids,
74
+ max_new_tokens=min(max_tokens, 200),
75
+ temperature=max(0.3, min(temperature, 1.0)),
76
  do_sample=True,
77
  top_p=0.9,
78
  repetition_penalty=1.1,
 
80
  eos_token_id=tokenizer.eos_token_id
81
  )
82
 
83
+ # Decodificar resposta completa primeiro
84
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
85
+ print(f"📄 Resposta completa: {full_response}")
86
+
87
+ # Extrair apenas a parte nova
88
  response = tokenizer.decode(
89
  outputs[0][len(inputs.input_ids[0]):],
90
  skip_special_tokens=True
91
  )
92
+ print(f"✨ Resposta extraída: '{response}'")
93
 
94
  # Limpar resposta
95
+ response = response.split("Human:")[0].strip()
96
+ response = response.replace("\n\n", "\n").strip()
97
+
98
+ final_response = response if response else "Não consegui gerar uma resposta válida."
99
+ print(f"✅ Resposta final: '{final_response}'")
100
 
101
+ return final_response
102
 
103
  except Exception as e:
104
+ error_msg = f"Erro na geração: {str(e)}"
105
+ print(f"❌ {error_msg}")
106
+ return error_msg
107
 
108
  # Endpoints da API
109
 
 
132
  @app.post("/chat", response_model=ChatResponse)
133
  async def chat_endpoint(request: ChatRequest):
134
  """Endpoint principal para chat"""
135
+ print(f"📨 Recebido POST /chat: {request.message}")
136
+
137
  if not request.message or not request.message.strip():
138
  raise HTTPException(status_code=400, detail="Mensagem não pode estar vazia")
139
 
140
  try:
141
+ response_text = generate_response(
142
  message=request.message,
143
  max_tokens=request.max_tokens,
144
  temperature=request.temperature
145
  )
146
 
147
+ result = ChatResponse(response=response_text)
148
+ print(f"📤 Enviando resposta: {response_text[:100]}...")
149
+ return result
150
 
151
  except Exception as e:
152
+ error_msg = f"Erro no endpoint: {str(e)}"
153
+ print(f"❌ {error_msg}")
154
+ raise HTTPException(status_code=500, detail=error_msg)
155
 
156
  @app.get("/chat")
157
  async def chat_get(message: str, max_tokens: int = 200, temperature: float = 0.7):
158
  """Endpoint GET para chat (mais simples de testar)"""
159
+ print(f"📨 Recebido GET /chat: {message}")
160
+
161
  if not message or not message.strip():
162
  raise HTTPException(status_code=400, detail="Parâmetro 'message' é obrigatório")
163
 
164
  try:
165
+ response_text = generate_response(
166
  message=message,
167
  max_tokens=max_tokens,
168
  temperature=temperature
169
  )
170
 
171
+ result = {"response": response_text, "status": "success"}
172
+ print(f"📤 Enviando resposta GET: {response_text[:100]}...")
173
+ return result
174
 
175
  except Exception as e:
176
+ error_msg = f"Erro no endpoint GET: {str(e)}"
177
+ print(f"❌ {error_msg}")
178
+ raise HTTPException(status_code=500, detail=error_msg)
179
 
180
  if __name__ == "__main__":
181
  print("🚀 Iniciando servidor FastAPI...")
 
190
  app,
191
  host="0.0.0.0",
192
  port=7860,
193
+ log_level="info" # Mostrar logs para debug
194
  )