# ** top_k = 3 splitter='#--' import json import streamlit as st import pandas as pd import numpy as np import time import os import openai import requests from PIL import Image from io import BytesIO import openai, numpy as np import re openai.api_key = os.getenv("API_KEY") from openai.error import RateLimitError import backoff def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def get_embedding(text, model="text-embedding-ada-002"): return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding'] f = open("./finalreact.txt", "r") text = f.read() paras=text.split(splitter) paras_temp = paras.copy() paras_clean = [] # merge short paras for i in range(len(paras_temp)): if len(paras_temp[i])==0: continue elif len(paras_temp[i])<200: paras_temp[i+1] = paras_temp[i] + ' ' + paras_temp[i+1] else: paras_clean.append(paras_temp[i]) len(paras_temp),len(paras_clean) df = pd.DataFrame(columns=['Text','Embeddings', 'Prompt Token' , 'Total Tokens']) df['Text']=paras_clean df['Embeddings'] = df.apply(lambda x: get_embedding(x['Text']), axis=1) df.to_csv("embeddings.csv", index=False)