Spaces:

MLBench
/

getscenes

Running

getscenes / utils.py

Upload 7 files

9f57d5e verified 27 days ago

1.41 kB

	import openai
	import numpy as np
	import re
	from typing import List, Tuple
	from config import EMBED_MODEL

	def get_embedding(text: str) -> List[float]:
	"""Generate embedding for a given text."""
	text_strip = text.replace("\n", " ").strip()
	response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
	return response.data[0].embedding

	def cosine_similarity(a: List[float], b: List[float]) -> float:
	"""Calculate cosine similarity between two vectors."""
	a = np.array(a)
	b = np.array(b)
	if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
	return 0.0
	return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

	def clean_time(time_str: str) -> str:
	"""Clean up time string."""
	if not time_str:
	return ""

	time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM\|PM)', time_str, re.IGNORECASE)
	if time_match:
	hour = time_match.group(1)
	minute = time_match.group(2) or "00"
	ampm = time_match.group(3).upper()
	return f"{hour}:{minute} {ampm}"

	return time_str.strip()

	def find_top_k_matches(user_embedding, dataset, k=3):
	"""Find top k matching entries from a dataset."""
	scored = []
	for entry_id, text, emb in dataset:
	score = cosine_similarity(user_embedding, emb)
	scored.append((score, entry_id, text))
	scored.sort(reverse=True)
	return scored[:k]