File size: 1,621 Bytes
f1d1d20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from openai import OpenAI
from typing import List
import os

from .prompts import SYS_PROMPT
from data_ingetion.data import AdvancedDatabase

api_key = (
    os.getenv("TOGETHER_API")
    or "0d1849365485f54f5deb32458276cb348948608da5a89dad0efc780c2d356916"
)
client = OpenAI(api_key=api_key, base_url="https://api.together.xyz/v1")
EMBEDDING_MODEL = "togethercomputer/m2-bert-80M-2k-retrieval"


def generate_response(query: str, group_name: str, return_chunks: bool = True):
    db = AdvancedDatabase()
    query_embedding = get_embedding(query)
    context = db.get_context(query_embedding, group_name)
    response = llm_response(context, query)
    if return_chunks:
        return (response, context)
    return response


def llm_response(context: List[str], user_query: str, history={}, stream: bool = False):
    response = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        messages=[
            {
                "role": "system",
                "content": SYS_PROMPT,
            },
            *history,
            {
                "role": "user",
                "content": f"Query : {user_query} \n\n Context: {context}",
            },
        ],
        stream=False,
    )

    # Feature to add streaming response
    # if stream:
    #    for chunk in response:
    #        yield chunk.choices[0].delta.content or ""
    # else:
    return response.choices[0].message.content


def get_embedding(query: str):
    response = client.embeddings.create(input=query, model=EMBEDDING_MODEL)
    embeddings = response.data[0].embedding
    return embeddings