Spaces:
Runtime error
Runtime error
| # ** | |
| top_k = 3 | |
| splitter='#--' | |
| import json | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import time | |
| import os | |
| import openai | |
| import requests | |
| from PIL import Image | |
| from io import BytesIO | |
| import openai, numpy as np | |
| import re | |
| openai.api_key = os.getenv("API_KEY") | |
| from openai.error import RateLimitError | |
| import backoff | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| def get_embedding(text, model="text-embedding-ada-002"): | |
| return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding'] | |
| f = open("./finalreact.txt", "r") | |
| text = f.read() | |
| paras=text.split(splitter) | |
| paras_temp = paras.copy() | |
| paras_clean = [] | |
| # merge short paras | |
| for i in range(len(paras_temp)): | |
| if len(paras_temp[i])==0: continue | |
| elif len(paras_temp[i])<200: | |
| paras_temp[i+1] = paras_temp[i] + ' ' + paras_temp[i+1] | |
| else: | |
| paras_clean.append(paras_temp[i]) | |
| len(paras_temp),len(paras_clean) | |
| df = pd.DataFrame(columns=['Text','Embeddings', 'Prompt Token' , 'Total Tokens']) | |
| df['Text']=paras_clean | |
| df['Embeddings'] = df.apply(lambda x: get_embedding(x['Text']), axis=1) | |
| df.to_csv("embeddings.csv", index=False) |