Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,10 +47,58 @@ for i, row in tqdm(df.iterrows(), total=df.shape[0]):
|
|
| 47 |
index.upsert(prepped)
|
| 48 |
prepped = []
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
def
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Create the Gradio interface
|
| 56 |
demo = gr.Interface(fn=run_query, inputs=gr.Textbox(label="User Input", placeholder="Type your question here..."), outputs=gr.Textbox(label="Matching Questions from Vector Database"))
|
|
|
|
| 47 |
index.upsert(prepped)
|
| 48 |
prepped = []
|
| 49 |
|
| 50 |
+
#Use ada embedding model to create vector embeddings for input articles.
|
| 51 |
+
def get_embeddings(articles, model="text-embedding-ada-002"):
|
| 52 |
+
return openai_client.embeddings.create(input = articles, model=model)
|
| 53 |
|
| 54 |
+
def create_prompt(query):
|
| 55 |
+
#Create embedding for input query.
|
| 56 |
+
embed = get_embeddings([query])
|
| 57 |
+
|
| 58 |
+
#Search match in Pinecone.
|
| 59 |
+
res = index.query(vector=embed.data[0].embedding, top_k=3, include_metadata=True)
|
| 60 |
+
|
| 61 |
+
#Extract Metadata and Text from the matches object returned by Pinecone.
|
| 62 |
+
contexts = [
|
| 63 |
+
x['metadata']['text'] for x in res['matches']
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
prompt_start = (
|
| 67 |
+
"Answer the question based on the context below.\n\n"+
|
| 68 |
+
"Context:\n"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
prompt_end = (
|
| 72 |
+
f"\n\nQuestion: {query}\nAnswer:"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
#Create a Prompt for OpenAI and provide results of pinecone as part of the prompt.
|
| 76 |
+
prompt = (
|
| 77 |
+
prompt_start + "\n\n---\n\n".join(contexts) +
|
| 78 |
+
prompt_end
|
| 79 |
+
)
|
| 80 |
+
return prompt
|
| 81 |
+
|
| 82 |
+
def summarize(prompt):
|
| 83 |
+
#Use completions API with GPT 3.5 Turbo, and prepared prompt to call OpenAI.
|
| 84 |
+
res = openai_client.completions.create(
|
| 85 |
+
model="gpt-3.5-turbo-instruct",
|
| 86 |
+
prompt=prompt,
|
| 87 |
+
temperature=0,
|
| 88 |
+
max_tokens=636,
|
| 89 |
+
top_p=1,
|
| 90 |
+
frequency_penalty=0,
|
| 91 |
+
presence_penalty=0,
|
| 92 |
+
stop=None
|
| 93 |
+
)
|
| 94 |
|
| 95 |
+
return res.choices[0].text
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def run_query(query):
|
| 99 |
+
prompt=create_prompt(query)
|
| 100 |
+
result=summarize(prompt)
|
| 101 |
+
return result
|
| 102 |
|
| 103 |
# Create the Gradio interface
|
| 104 |
demo = gr.Interface(fn=run_query, inputs=gr.Textbox(label="User Input", placeholder="Type your question here..."), outputs=gr.Textbox(label="Matching Questions from Vector Database"))
|