Spaces:
Sleeping
Sleeping
File size: 3,265 Bytes
904d7f0 364e055 904d7f0 364e055 904d7f0 639835f 904d7f0 364e055 904d7f0 364e055 904d7f0 364e055 904d7f0 364e055 e6f4f4e 364e055 904d7f0 364e055 904d7f0 364e055 904d7f0 0792d66 904d7f0 364e055 904d7f0 364e055 904d7f0 639835f 904d7f0 639835f 904d7f0 364e055 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
import gradio as gr
import redis
import numpy as np
import json
from openai import AzureOpenAI
from sentence_transformers import SentenceTransformer
# Redis Cloud connection
redis_client = redis.Redis(
host="redis-12628.c14.us-east-1-2.ec2.redns.redis-cloud.com",
port=12628,
decode_responses=True,
username="default",
password=os.getenv("REDIS_PASSWORD")
)
# Azure OpenAI client (only for chat, not embeddings anymore)
client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(),
api_version="2025-01-01-preview",
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
)
CHAT_DEPLOYMENT = "gpt-4.1" # your Azure chat deployment
# π Better embedding model from HF
# Good tradeoff between quality + performance
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
# Helper: get embedding from HF
def get_embedding(text):
return embedder.encode(text, convert_to_numpy=True).astype(np.float32)
# Helper: cosine similarity
def cosine_similarity(vec1, vec2):
return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
def search_cache(user_input, threshold=0.8):
query_vec = get_embedding(user_input)
best_key, best_score, best_val = None, -1, None
for key, val in redis_client.hgetall("cache").items():
entry = json.loads(val)
vec = np.array(entry["embedding"], dtype=np.float32)
score = cosine_similarity(query_vec, vec)
if score > best_score:
best_score, best_key, best_val = score, key, entry["output"]
if best_score >= threshold:
return best_val
return None
def store_cache(user_input, output):
vec = get_embedding(user_input).tolist()
redis_client.hset("cache", user_input, json.dumps({
"embedding": vec,
"output": output
}))
def chat_with_ai(user_input):
if not user_input:
return "Please type something."
# π Check Redis semantic cache
cached = search_cache(user_input)
if cached:
return f"[From Redis] {cached}"
# Otherwise query Azure OpenAI
response = client.chat.completions.create(
model=CHAT_DEPLOYMENT,
messages=[{"role": "user", "content": user_input}],
temperature=0.8,
max_tokens=700
)
output = response.choices[0].message.content.strip()
# πΎ Save with embedding in Redis
store_cache(user_input, output)
return f"[From OpenAI] {output}"
# Gradio UI
with gr.Blocks(title="Azure OpenAI + Redis Cloud Chat") as demo:
gr.Markdown("# π¬ Azure OpenAI + Redis Cloud (Semantic Cache) Demo")
with gr.Row():
chatbot = gr.Chatbot(type="messages")
with gr.Row():
msg = gr.Textbox(placeholder="Type your message here...")
send = gr.Button("Send")
def respond(message, history):
bot_reply = chat_with_ai(message)
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": bot_reply})
return history, ""
send.click(respond, [msg, chatbot], [chatbot, msg])
msg.submit(respond, [msg, chatbot], [chatbot, msg])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, pwa=True) |