Spaces:

ThirdEyeData
/

Semantic-Search

Runtime error

saritha5 commited on Feb 7, 2023

Commit

d600cc0

1 Parent(s): f18f66d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

	@@ -11,3 +11,27 @@ df = pd.read_csv(input_datapath, index_col=0)
11
12	st.title("Semanti Search")
13

 st.title("Semanti Search")
+#adding another column having the summary as title and the actual text as content
+df["combined"] = (
+    "Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
+)
+# embedding model parameters
+embedding_model = "text-embedding-ada-002"
+embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
+max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191
+encoding = tiktoken.get_encoding(embedding_encoding)
+top_n = 500
+# omit reviews that are too long to embed
+df["n_tokens"] = df.combined.apply(lambda x: len(encoding.encode(x)))
+df = df[df.n_tokens <= max_tokens].tail(top_n)
+datafile_path = "fine_food_reviews_with_embeddings_1k.csv"
+df = pd.read_csv(datafile_path)
+df["embedding"] = df.embedding.apply(eval).apply(np.array)