File size: 1,215 Bytes
5081502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# **

top_k = 3
splitter='#--'

import json 
import streamlit as st
import pandas as pd
import numpy as np
import time 
import os
import openai
import requests
from PIL import Image
from io import BytesIO
import openai, numpy as np
import re
openai.api_key = os.getenv("API_KEY")
from openai.error import RateLimitError
import backoff


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


def get_embedding(text, model="text-embedding-ada-002"):
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']
 

f = open("./finalreact.txt", "r")
text = f.read()
paras=text.split(splitter)

paras_temp = paras.copy()
paras_clean = []
# merge short paras
for i in range(len(paras_temp)):
    if len(paras_temp[i])==0: continue
    elif len(paras_temp[i])<200:
        paras_temp[i+1] = paras_temp[i] + ' ' + paras_temp[i+1]
    else:
        paras_clean.append(paras_temp[i])
len(paras_temp),len(paras_clean)

df = pd.DataFrame(columns=['Text','Embeddings', 'Prompt Token' , 'Total Tokens'])
df['Text']=paras_clean
df['Embeddings'] = df.apply(lambda x: get_embedding(x['Text']), axis=1)
df.to_csv("embeddings.csv", index=False)