Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import redis | |
| import numpy as np | |
| import json | |
| from openai import AzureOpenAI | |
| from sentence_transformers import SentenceTransformer | |
| # Redis Cloud connection | |
| redis_client = redis.Redis( | |
| host="redis-12628.c14.us-east-1-2.ec2.redns.redis-cloud.com", | |
| port=12628, | |
| decode_responses=True, | |
| username="default", | |
| password=os.getenv("REDIS_PASSWORD") | |
| ) | |
| # Azure OpenAI client (only for chat, not embeddings anymore) | |
| client = AzureOpenAI( | |
| api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(), | |
| api_version="2025-01-01-preview", | |
| azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip() | |
| ) | |
| CHAT_DEPLOYMENT = "gpt-4.1" # your Azure chat deployment | |
| # π Better embedding model from HF | |
| # Good tradeoff between quality + performance | |
| embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") | |
| # Helper: get embedding from HF | |
| def get_embedding(text): | |
| return embedder.encode(text, convert_to_numpy=True).astype(np.float32) | |
| # Helper: cosine similarity | |
| def cosine_similarity(vec1, vec2): | |
| return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))) | |
| def search_cache(user_input, threshold=0.8): | |
| query_vec = get_embedding(user_input) | |
| best_key, best_score, best_val = None, -1, None | |
| for key, val in redis_client.hgetall("cache").items(): | |
| entry = json.loads(val) | |
| vec = np.array(entry["embedding"], dtype=np.float32) | |
| score = cosine_similarity(query_vec, vec) | |
| if score > best_score: | |
| best_score, best_key, best_val = score, key, entry["output"] | |
| if best_score >= threshold: | |
| return best_val | |
| return None | |
| def store_cache(user_input, output): | |
| vec = get_embedding(user_input).tolist() | |
| redis_client.hset("cache", user_input, json.dumps({ | |
| "embedding": vec, | |
| "output": output | |
| })) | |
| def chat_with_ai(user_input): | |
| if not user_input: | |
| return "Please type something." | |
| # π Check Redis semantic cache | |
| cached = search_cache(user_input) | |
| if cached: | |
| return f"[From Redis] {cached}" | |
| # Otherwise query Azure OpenAI | |
| response = client.chat.completions.create( | |
| model=CHAT_DEPLOYMENT, | |
| messages=[{"role": "user", "content": user_input}], | |
| temperature=0.8, | |
| max_tokens=700 | |
| ) | |
| output = response.choices[0].message.content.strip() | |
| # πΎ Save with embedding in Redis | |
| store_cache(user_input, output) | |
| return f"[From OpenAI] {output}" | |
| # Gradio UI | |
| with gr.Blocks(title="Azure OpenAI + Redis Cloud Chat") as demo: | |
| gr.Markdown("# π¬ Azure OpenAI + Redis Cloud (Semantic Cache) Demo") | |
| with gr.Row(): | |
| chatbot = gr.Chatbot(type="messages") | |
| with gr.Row(): | |
| msg = gr.Textbox(placeholder="Type your message here...") | |
| send = gr.Button("Send") | |
| def respond(message, history): | |
| bot_reply = chat_with_ai(message) | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": bot_reply}) | |
| return history, "" | |
| send.click(respond, [msg, chatbot], [chatbot, msg]) | |
| msg.submit(respond, [msg, chatbot], [chatbot, msg]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, pwa=True) |