|
|
import os |
|
|
from typing import List, Tuple |
|
|
from flask import Flask, request, jsonify |
|
|
from google.cloud import vertex_ai |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
project_id = os.getenv("GOOGLE_CLOUD_PROJECT_ID") |
|
|
location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1") |
|
|
|
|
|
|
|
|
vertex_ai_client = vertex_ai.PredictionServiceClient(client_options={"api_endpoint": f"{location}-aiplatform.googleapis.com"}) |
|
|
|
|
|
|
|
|
endpoint = "projects/{project_id}/locations/{location}/endpoints/{endpoint_id}" |
|
|
|
|
|
|
|
|
SYSTEM_MESSAGE = "You are a helpful assistant." |
|
|
|
|
|
|
|
|
def generate_response( |
|
|
user_input: str, |
|
|
history: List[Tuple[str, str]], |
|
|
max_tokens: int = 150, |
|
|
temperature: float = 0.7, |
|
|
top_p: float = 1.0 |
|
|
) -> str: |
|
|
""" |
|
|
Generates a response using the Google Gemini (Vertex AI) API. |
|
|
Args: |
|
|
user_input: The user's input message. |
|
|
history: A list of tuples containing the conversation history |
|
|
(user input, AI response). |
|
|
max_tokens: The maximum number of tokens in the generated response. |
|
|
temperature: Controls the randomness of the generated response. |
|
|
top_p: Controls the nucleus sampling probability. |
|
|
Returns: |
|
|
str: The generated response from the AI model. |
|
|
""" |
|
|
try: |
|
|
|
|
|
conversation = [{"role": "system", "content": SYSTEM_MESSAGE}] |
|
|
for user_message, assistant_message in history: |
|
|
conversation.append({"role": "user", "content": user_message}) |
|
|
conversation.append({"role": "assistant", "content": assistant_message}) |
|
|
|
|
|
|
|
|
conversation.append({"role": "user", "content": user_input}) |
|
|
|
|
|
|
|
|
instances = [{"content": conversation}] |
|
|
parameters = { |
|
|
"temperature": temperature, |
|
|
"max_output_tokens": max_tokens, |
|
|
"top_p": top_p, |
|
|
} |
|
|
|
|
|
|
|
|
response = vertex_ai_client.predict(endpoint=endpoint, instances=instances, parameters=parameters) |
|
|
|
|
|
|
|
|
ai_response = response.predictions[0].get('content', 'Sorry, I couldn’t generate a response.') |
|
|
|
|
|
return ai_response |
|
|
|
|
|
except Exception as e: |
|
|
print(f"An error occurred: {e}") |
|
|
return "Error: An unexpected error occurred while processing your request." |
|
|
|
|
|
|
|
|
@app.route("/chat", methods=["POST"]) |
|
|
def chat(): |
|
|
try: |
|
|
|
|
|
user_input = request.json.get("user_input", "") |
|
|
history = request.json.get("history", []) |
|
|
|
|
|
|
|
|
response = generate_response( |
|
|
user_input=user_input, |
|
|
history=history |
|
|
) |
|
|
|
|
|
|
|
|
return jsonify({"response": response}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
app.run(debug=True, host="0.0.0.0", port=5000) |
|
|
|