Spaces:
Sleeping
Sleeping
Blue2962 commited on
Commit ·
4d2d127
1
Parent(s): e62c664
- main.py +9 -1
- pinecone.py +29 -0
- requirements.txt +2 -1
main.py
CHANGED
|
@@ -6,6 +6,7 @@ import streamlit as st
|
|
| 6 |
from pathlib import Path
|
| 7 |
import tempfile
|
| 8 |
import uuid
|
|
|
|
| 9 |
|
| 10 |
load_dotenv()
|
| 11 |
openai_api_key = os.environ["OPENAI_API_KEY"]
|
|
@@ -18,7 +19,14 @@ if "messages" not in st.session_state:
|
|
| 18 |
st.session_state.messages = []
|
| 19 |
|
| 20 |
def response(user_input):
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
for msg in st.session_state.messages:
|
| 23 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 24 |
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
import tempfile
|
| 8 |
import uuid
|
| 9 |
+
from pinecone import query_text
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
openai_api_key = os.environ["OPENAI_API_KEY"]
|
|
|
|
| 19 |
st.session_state.messages = []
|
| 20 |
|
| 21 |
def response(user_input):
|
| 22 |
+
pinecone_results = query_text(user_input, top_k=3)
|
| 23 |
+
contexts = [m['metadata']['text'] for m in pinecone_results['matches']]
|
| 24 |
+
context_text = "\n".join(contexts)
|
| 25 |
+
|
| 26 |
+
messages = [{
|
| 27 |
+
"role": "system",
|
| 28 |
+
"content": f"""あなたは安田章紀(やすだあきのり)博士です。京都大学に所属していて、チベット仏教を専門にしています。生前の情報:{context_text}この情報を参考に日本語で答えてください。"""},]
|
| 29 |
+
|
| 30 |
for msg in st.session_state.messages:
|
| 31 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 32 |
|
pinecone.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pinecone
|
| 2 |
+
import os
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
pinecone.init(api_key=os.environ["PINECONE_API_KEY"])
|
| 6 |
+
index = pinecone.Index("your-index")
|
| 7 |
+
|
| 8 |
+
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
| 9 |
+
|
| 10 |
+
def upsert_texts(texts, ids):
|
| 11 |
+
embeddings = []
|
| 12 |
+
for text in texts:
|
| 13 |
+
response = client.embeddings.create(
|
| 14 |
+
model="text-embedding-3-large",
|
| 15 |
+
input=text
|
| 16 |
+
)
|
| 17 |
+
embeddings.append(response.data[0].embedding)
|
| 18 |
+
|
| 19 |
+
vectors = [(id, emb) for id, emb in zip(ids, embeddings)]
|
| 20 |
+
index.upsert(vectors)
|
| 21 |
+
|
| 22 |
+
def query_text(query, top_k=5):
|
| 23 |
+
response = client.embeddings.create(
|
| 24 |
+
model="text-embedding-3-large",
|
| 25 |
+
input=query
|
| 26 |
+
)
|
| 27 |
+
embedding = response.data[0].embedding
|
| 28 |
+
results = index.query(vector=embedding, top_k=top_k, include_metadata=True)
|
| 29 |
+
return results
|
requirements.txt
CHANGED
|
@@ -2,4 +2,5 @@ streamlit
|
|
| 2 |
openai
|
| 3 |
requests
|
| 4 |
python-dotenv
|
| 5 |
-
|
|
|
|
|
|
| 2 |
openai
|
| 3 |
requests
|
| 4 |
python-dotenv
|
| 5 |
+
pinecone-client
|
| 6 |
+
elevenlabs
|