Redis-AzureAI / app.py
PraneshJs's picture
Changed embedding
e6f4f4e verified
raw
history blame
3.27 kB
import os
import gradio as gr
import redis
import numpy as np
import json
from openai import AzureOpenAI
from sentence_transformers import SentenceTransformer
# Redis Cloud connection
redis_client = redis.Redis(
host="redis-12628.c14.us-east-1-2.ec2.redns.redis-cloud.com",
port=12628,
decode_responses=True,
username="default",
password=os.getenv("REDIS_PASSWORD")
)
# Azure OpenAI client (only for chat, not embeddings anymore)
client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(),
api_version="2025-01-01-preview",
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
)
CHAT_DEPLOYMENT = "gpt-4.1" # your Azure chat deployment
# πŸš€ Better embedding model from HF
# Good tradeoff between quality + performance
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
# Helper: get embedding from HF
def get_embedding(text):
return embedder.encode(text, convert_to_numpy=True).astype(np.float32)
# Helper: cosine similarity
def cosine_similarity(vec1, vec2):
return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
def search_cache(user_input, threshold=0.8):
query_vec = get_embedding(user_input)
best_key, best_score, best_val = None, -1, None
for key, val in redis_client.hgetall("cache").items():
entry = json.loads(val)
vec = np.array(entry["embedding"], dtype=np.float32)
score = cosine_similarity(query_vec, vec)
if score > best_score:
best_score, best_key, best_val = score, key, entry["output"]
if best_score >= threshold:
return best_val
return None
def store_cache(user_input, output):
vec = get_embedding(user_input).tolist()
redis_client.hset("cache", user_input, json.dumps({
"embedding": vec,
"output": output
}))
def chat_with_ai(user_input):
if not user_input:
return "Please type something."
# πŸ” Check Redis semantic cache
cached = search_cache(user_input)
if cached:
return f"[From Redis] {cached}"
# Otherwise query Azure OpenAI
response = client.chat.completions.create(
model=CHAT_DEPLOYMENT,
messages=[{"role": "user", "content": user_input}],
temperature=0.8,
max_tokens=700
)
output = response.choices[0].message.content.strip()
# πŸ’Ύ Save with embedding in Redis
store_cache(user_input, output)
return f"[From OpenAI] {output}"
# Gradio UI
with gr.Blocks(title="Azure OpenAI + Redis Cloud Chat") as demo:
gr.Markdown("# πŸ’¬ Azure OpenAI + Redis Cloud (Semantic Cache) Demo")
with gr.Row():
chatbot = gr.Chatbot(type="messages")
with gr.Row():
msg = gr.Textbox(placeholder="Type your message here...")
send = gr.Button("Send")
def respond(message, history):
bot_reply = chat_with_ai(message)
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": bot_reply})
return history, ""
send.click(respond, [msg, chatbot], [chatbot, msg])
msg.submit(respond, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, pwa=True)