File size: 3,265 Bytes
904d7f0
 
 
364e055
 
904d7f0
364e055
904d7f0
 
 
639835f
904d7f0
 
 
364e055
904d7f0
 
364e055
904d7f0
 
364e055
904d7f0
 
364e055
 
 
e6f4f4e
 
 
364e055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
904d7f0
 
 
 
 
364e055
 
904d7f0
 
 
 
 
364e055
904d7f0
0792d66
 
904d7f0
 
 
364e055
 
904d7f0
 
 
 
 
364e055
904d7f0
639835f
904d7f0
 
 
 
 
 
639835f
 
904d7f0
 
 
 
 
 
364e055
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import gradio as gr
import redis
import numpy as np
import json
from openai import AzureOpenAI
from sentence_transformers import SentenceTransformer

# Redis Cloud connection
redis_client = redis.Redis(
    host="redis-12628.c14.us-east-1-2.ec2.redns.redis-cloud.com",
    port=12628,
    decode_responses=True,
    username="default",
    password=os.getenv("REDIS_PASSWORD")
)

# Azure OpenAI client (only for chat, not embeddings anymore)
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(),
    api_version="2025-01-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
)

CHAT_DEPLOYMENT = "gpt-4.1"   # your Azure chat deployment

# πŸš€ Better embedding model from HF
# Good tradeoff between quality + performance
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# Helper: get embedding from HF
def get_embedding(text):
    return embedder.encode(text, convert_to_numpy=True).astype(np.float32)

# Helper: cosine similarity
def cosine_similarity(vec1, vec2):
    return float(np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)))

def search_cache(user_input, threshold=0.8):
    query_vec = get_embedding(user_input)
    best_key, best_score, best_val = None, -1, None

    for key, val in redis_client.hgetall("cache").items():
        entry = json.loads(val)
        vec = np.array(entry["embedding"], dtype=np.float32)
        score = cosine_similarity(query_vec, vec)
        if score > best_score:
            best_score, best_key, best_val = score, key, entry["output"]

    if best_score >= threshold:
        return best_val
    return None

def store_cache(user_input, output):
    vec = get_embedding(user_input).tolist()
    redis_client.hset("cache", user_input, json.dumps({
        "embedding": vec,
        "output": output
    }))

def chat_with_ai(user_input):
    if not user_input:
        return "Please type something."

    # πŸ” Check Redis semantic cache
    cached = search_cache(user_input)
    if cached:
        return f"[From Redis] {cached}"

    # Otherwise query Azure OpenAI
    response = client.chat.completions.create(
        model=CHAT_DEPLOYMENT,
        messages=[{"role": "user", "content": user_input}],
        temperature=0.8,
        max_tokens=700
    )
    output = response.choices[0].message.content.strip()

    # πŸ’Ύ Save with embedding in Redis
    store_cache(user_input, output)

    return f"[From OpenAI] {output}"

# Gradio UI
with gr.Blocks(title="Azure OpenAI + Redis Cloud Chat") as demo:
    gr.Markdown("# πŸ’¬ Azure OpenAI + Redis Cloud (Semantic Cache) Demo")
    with gr.Row():
        chatbot = gr.Chatbot(type="messages")
    with gr.Row():
        msg = gr.Textbox(placeholder="Type your message here...")
        send = gr.Button("Send")

    def respond(message, history):
        bot_reply = chat_with_ai(message)
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": bot_reply})
        return history, ""

    send.click(respond, [msg, chatbot], [chatbot, msg])
    msg.submit(respond, [msg, chatbot], [chatbot, msg])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, pwa=True)