Spaces:
No application file
No application file
File size: 1,621 Bytes
f1d1d20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
from openai import OpenAI
from typing import List
import os
from .prompts import SYS_PROMPT
from data_ingetion.data import AdvancedDatabase
api_key = (
os.getenv("TOGETHER_API")
or "0d1849365485f54f5deb32458276cb348948608da5a89dad0efc780c2d356916"
)
client = OpenAI(api_key=api_key, base_url="https://api.together.xyz/v1")
EMBEDDING_MODEL = "togethercomputer/m2-bert-80M-2k-retrieval"
def generate_response(query: str, group_name: str, return_chunks: bool = True):
db = AdvancedDatabase()
query_embedding = get_embedding(query)
context = db.get_context(query_embedding, group_name)
response = llm_response(context, query)
if return_chunks:
return (response, context)
return response
def llm_response(context: List[str], user_query: str, history={}, stream: bool = False):
response = client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
messages=[
{
"role": "system",
"content": SYS_PROMPT,
},
*history,
{
"role": "user",
"content": f"Query : {user_query} \n\n Context: {context}",
},
],
stream=False,
)
# Feature to add streaming response
# if stream:
# for chunk in response:
# yield chunk.choices[0].delta.content or ""
# else:
return response.choices[0].message.content
def get_embedding(query: str):
response = client.embeddings.create(input=query, model=EMBEDDING_MODEL)
embeddings = response.data[0].embedding
return embeddings
|