from fastapi import FastAPI, Response, Cookie from fastapi.responses import HTMLResponse from pydantic import BaseModel, Field import time import os value = os.environ.get('YOUR_ENV_KEY') app = FastAPI() Tokens = [] History = [] @app.get("/", response_class=HTMLResponse) async def read_root(response: Response): token = time.time() Tokens.append(str(token)) History.append([{"role": "system", "content": "You are the Prompt Optimization Engine (POE), a tool designed to maximize the clarity and effectiveness of user queries for large language models. Your sole function is to accept a user-submitted prompt, analyze its core intent, context, and desired output format. You must then return a single, improved, streamlined version. The refined prompt must be concise, articulate the required persona and constraints clearly, and fully preserve the original user intent. Always prioritize action-oriented language and remove any conversational filler or ambiguity."}]) response.set_cookie(key="token", value=token, httponly=True, secure=True, samesite='none') # Set cookie return ''' Chatbot Assistant
Chat Assistant

This AI model provides information based on pre-existing data and patterns, but may not always offer accurate, up-to-date, or context-specific advice. Always verify critical details from reliable sources and exercise caution when acting on suggestions.

''' from google import genai from google.genai import types import requests Api_key = os.getenv('API_KEY') System_instruction = '''**System Prompt for a Programmer-Oriented Coding Assistant:**\n\n> You are a highly focused, fast, and expert-level coding assistant built for professional programmers.\n> Your primary role is **to assist with code writing, debugging, refactoring, optimization, and architecture**.\n> Avoid unnecessary explanations unless asked. Do not teach—**support the user like a senior pair programmer** who assumes context and skill. Prioritize clean, correct, and efficient code.\n\n> Always:\n> * Get straight to the point.\n> * Suggest the most practical and scalable solution.\n> * Respond with complete code blocks when needed.\n> * Use strong defaults and modern conventions.\n> * Assume the user knows what they're doing.\n> * Think ahead: anticipate potential pitfalls or better approaches.\n> * Give fast, minimal answers when asked for quick help.\n\n> Only elaborate if specifically requested (e.g., “explain,” “why,” “teach,” “verbose”)''' client = genai.Client(api_key=Api_key) class ChatRequest(BaseModel): """Request model for the chat endpoint.""" prompt: str def gen(prompt): response = client.models.generate_content( model="gemma-3-4b-it", contents= prompt ) return response.text @app.post("/response") async def handle_chat(chat_request: ChatRequest, token: str = Cookie(None)): a= time.time() if token in Tokens: i = Tokens.index(token) History[i].append({"role": "user", "content": chat_request.prompt}) text = 'system\n'+System_instruction+'\nuser\n' for j in History[i]: if j['role']== 'user': text = text + j['content'] + '\nmodel\n' else : text = text + j['content'] + '\nuser\n' stream = gen(text) History[i].append({"role": "assistant", "content": stream}) b = time.time() return {"text": stream, "time": (b-a)/1000, "t_per_sec": 0} else: return 'Please stop. Just refresh the page.' @app.post("/history") async def history(chat_request: ChatRequest): if chat_request.prompt == value: time.sleep(10) return History if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)