Blue2962 commited on
Commit
4d2d127
·
1 Parent(s): e62c664
Files changed (3) hide show
  1. main.py +9 -1
  2. pinecone.py +29 -0
  3. requirements.txt +2 -1
main.py CHANGED
@@ -6,6 +6,7 @@ import streamlit as st
6
  from pathlib import Path
7
  import tempfile
8
  import uuid
 
9
 
10
  load_dotenv()
11
  openai_api_key = os.environ["OPENAI_API_KEY"]
@@ -18,7 +19,14 @@ if "messages" not in st.session_state:
18
  st.session_state.messages = []
19
 
20
  def response(user_input):
21
- messages = [{"role": "system", "content": "あなたは安田章紀(やすだあきのり)博士です。京都大学に所属していて、チベット仏教を専門にしています。日本語で答えてください。"},]
 
 
 
 
 
 
 
22
  for msg in st.session_state.messages:
23
  messages.append({"role": msg["role"], "content": msg["content"]})
24
 
 
6
  from pathlib import Path
7
  import tempfile
8
  import uuid
9
+ from pinecone import query_text
10
 
11
  load_dotenv()
12
  openai_api_key = os.environ["OPENAI_API_KEY"]
 
19
  st.session_state.messages = []
20
 
21
  def response(user_input):
22
+ pinecone_results = query_text(user_input, top_k=3)
23
+ contexts = [m['metadata']['text'] for m in pinecone_results['matches']]
24
+ context_text = "\n".join(contexts)
25
+
26
+ messages = [{
27
+ "role": "system",
28
+ "content": f"""あなたは安田章紀(やすだあきのり)博士です。京都大学に所属していて、チベット仏教を専門にしています。生前の情報:{context_text}この情報を参考に日本語で答えてください。"""},]
29
+
30
  for msg in st.session_state.messages:
31
  messages.append({"role": msg["role"], "content": msg["content"]})
32
 
pinecone.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pinecone
2
+ import os
3
+ from openai import OpenAI
4
+
5
+ pinecone.init(api_key=os.environ["PINECONE_API_KEY"])
6
+ index = pinecone.Index("your-index")
7
+
8
+ client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
9
+
10
+ def upsert_texts(texts, ids):
11
+ embeddings = []
12
+ for text in texts:
13
+ response = client.embeddings.create(
14
+ model="text-embedding-3-large",
15
+ input=text
16
+ )
17
+ embeddings.append(response.data[0].embedding)
18
+
19
+ vectors = [(id, emb) for id, emb in zip(ids, embeddings)]
20
+ index.upsert(vectors)
21
+
22
+ def query_text(query, top_k=5):
23
+ response = client.embeddings.create(
24
+ model="text-embedding-3-large",
25
+ input=query
26
+ )
27
+ embedding = response.data[0].embedding
28
+ results = index.query(vector=embedding, top_k=top_k, include_metadata=True)
29
+ return results
requirements.txt CHANGED
@@ -2,4 +2,5 @@ streamlit
2
  openai
3
  requests
4
  python-dotenv
5
-
 
 
2
  openai
3
  requests
4
  python-dotenv
5
+ pinecone-client
6
+ elevenlabs