File size: 1,406 Bytes
9f57d5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import openai
import numpy as np
import re
from typing import List, Tuple
from config import EMBED_MODEL

def get_embedding(text: str) -> List[float]:
    """Generate embedding for a given text."""
    text_strip = text.replace("\n", " ").strip()
    response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
    return response.data[0].embedding

def cosine_similarity(a: List[float], b: List[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    a = np.array(a)
    b = np.array(b)
    if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
        return 0.0
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def clean_time(time_str: str) -> str:
    """Clean up time string."""
    if not time_str:
        return ""
    
    time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE)
    if time_match:
        hour = time_match.group(1)
        minute = time_match.group(2) or "00"
        ampm = time_match.group(3).upper()
        return f"{hour}:{minute} {ampm}"
    
    return time_str.strip()

def find_top_k_matches(user_embedding, dataset, k=3):
    """Find top k matching entries from a dataset."""
    scored = []
    for entry_id, text, emb in dataset:
        score = cosine_similarity(user_embedding, emb)
        scored.append((score, entry_id, text))
    scored.sort(reverse=True)
    return scored[:k]