Spaces:

plvictor
/

Phi3Mini

Sleeping

App Files Files Community

plvictor commited on Aug 4, 2025

Commit

39bc619

verified ·

1 Parent(s): d486fcb

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -14

app.py CHANGED Viewed

@@ -51,23 +51,28 @@ model_lock = threading.Lock()
 def generate_response(message: str, max_tokens: int = 200, temperature: float = 0.7) -> str:
     """Gerar resposta com o modelo"""
     try:
         with model_lock:
-            prompt = f"<|system|>\nVocê é um assistente útil. Responda de forma clara e concisa.<|user|>\n{message}<|assistant|>\n"
             inputs = tokenizer(
                 prompt,
                 return_tensors="pt",
                 truncation=True,
-                max_length=1000,
                 padding=False
             )
             with torch.no_grad():
                 outputs = model.generate(
                     inputs.input_ids,
-                    max_new_tokens=min(max_tokens, 300),
-                    temperature=max(0.1, min(temperature, 1.0)),
                     do_sample=True,
                     top_p=0.9,
                     repetition_penalty=1.1,
@@ -75,18 +80,30 @@ def generate_response(message: str, max_tokens: int = 200, temperature: float =
                     eos_token_id=tokenizer.eos_token_id
                 )
             response = tokenizer.decode(
                 outputs[0][len(inputs.input_ids[0]):],
                 skip_special_tokens=True
             )
             # Limpar resposta
-            response = response.split("<|user|>")[0].split("<|system|>")[0].strip()
-            return response if response else "Não consegui gerar uma resposta."
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Erro na geração: {str(e)}")
 # Endpoints da API
@@ -115,38 +132,50 @@ async def health_check():
 @app.post("/chat", response_model=ChatResponse)
 async def chat_endpoint(request: ChatRequest):
     """Endpoint principal para chat"""
     if not request.message or not request.message.strip():
         raise HTTPException(status_code=400, detail="Mensagem não pode estar vazia")
     try:
-        response = generate_response(
             message=request.message,
             max_tokens=request.max_tokens,
             temperature=request.temperature
         )
-        return ChatResponse(response=response)
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/chat")
 async def chat_get(message: str, max_tokens: int = 200, temperature: float = 0.7):
     """Endpoint GET para chat (mais simples de testar)"""
     if not message or not message.strip():
         raise HTTPException(status_code=400, detail="Parâmetro 'message' é obrigatório")
     try:
-        response = generate_response(
             message=message,
             max_tokens=max_tokens,
             temperature=temperature
         )
-        return {"response": response, "status": "success"}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     print("🚀 Iniciando servidor FastAPI...")
@@ -161,5 +190,5 @@ if __name__ == "__main__":
         app,
         host="0.0.0.0",
         port=7860,
-        log_level="error"  # Reduzir logs
     )

 def generate_response(message: str, max_tokens: int = 200, temperature: float = 0.7) -> str:
     """Gerar resposta com o modelo"""
+    print(f"🔄 Gerando resposta para: '{message[:50]}...'")
     try:
         with model_lock:
+            # Prompt mais simples e direto
+            prompt = f"Human: {message}\nAssistant:"
+            print(f"📝 Prompt: {prompt}")
             inputs = tokenizer(
                 prompt,
                 return_tensors="pt",
                 truncation=True,
+                max_length=800,
                 padding=False
             )
+            print(f"🔢 Input tokens: {inputs.input_ids.shape[1]}")
             with torch.no_grad():
                 outputs = model.generate(
                     inputs.input_ids,
+                    max_new_tokens=min(max_tokens, 200),
+                    temperature=max(0.3, min(temperature, 1.0)),
                     do_sample=True,
                     top_p=0.9,
                     repetition_penalty=1.1,
                     eos_token_id=tokenizer.eos_token_id
                 )
+            # Decodificar resposta completa primeiro
+            full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            print(f"📄 Resposta completa: {full_response}")
+            # Extrair apenas a parte nova
             response = tokenizer.decode(
                 outputs[0][len(inputs.input_ids[0]):],
                 skip_special_tokens=True
             )
+            print(f"✨ Resposta extraída: '{response}'")
             # Limpar resposta
+            response = response.split("Human:")[0].strip()
+            response = response.replace("\n\n", "\n").strip()
+            final_response = response if response else "Não consegui gerar uma resposta válida."
+            print(f"✅ Resposta final: '{final_response}'")
+            return final_response
     except Exception as e:
+        error_msg = f"Erro na geração: {str(e)}"
+        print(f"❌ {error_msg}")
+        return error_msg
 # Endpoints da API
 @app.post("/chat", response_model=ChatResponse)
 async def chat_endpoint(request: ChatRequest):
     """Endpoint principal para chat"""
+    print(f"📨 Recebido POST /chat: {request.message}")
     if not request.message or not request.message.strip():
         raise HTTPException(status_code=400, detail="Mensagem não pode estar vazia")
     try:
+        response_text = generate_response(
             message=request.message,
             max_tokens=request.max_tokens,
             temperature=request.temperature
         )
+        result = ChatResponse(response=response_text)
+        print(f"📤 Enviando resposta: {response_text[:100]}...")
+        return result
     except Exception as e:
+        error_msg = f"Erro no endpoint: {str(e)}"
+        print(f"❌ {error_msg}")
+        raise HTTPException(status_code=500, detail=error_msg)
 @app.get("/chat")
 async def chat_get(message: str, max_tokens: int = 200, temperature: float = 0.7):
     """Endpoint GET para chat (mais simples de testar)"""
+    print(f"📨 Recebido GET /chat: {message}")
     if not message or not message.strip():
         raise HTTPException(status_code=400, detail="Parâmetro 'message' é obrigatório")
     try:
+        response_text = generate_response(
             message=message,
             max_tokens=max_tokens,
             temperature=temperature
         )
+        result = {"response": response_text, "status": "success"}
+        print(f"📤 Enviando resposta GET: {response_text[:100]}...")
+        return result
     except Exception as e:
+        error_msg = f"Erro no endpoint GET: {str(e)}"
+        print(f"❌ {error_msg}")
+        raise HTTPException(status_code=500, detail=error_msg)
 if __name__ == "__main__":
     print("🚀 Iniciando servidor FastAPI...")
         app,
         host="0.0.0.0",
         port=7860,
+        log_level="info"  # Mostrar logs para debug
     )