Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -44,14 +44,30 @@ def add_text(history, text):
|
|
| 44 |
|
| 45 |
def retrieve_thoughts(query, ):
|
| 46 |
# print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10))
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
| 49 |
# TO-DO: What if user query doesn't match what we provide as documents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def qa_retrieve(query,):
|
| 57 |
|
|
@@ -72,11 +88,10 @@ def qa_retrieve(query,):
|
|
| 72 |
tier_1 = thoughts['tier 1']
|
| 73 |
tier_2 = thoughts['tier 2']
|
| 74 |
|
| 75 |
-
reference = [
|
| 76 |
-
|
| 77 |
-
tier_1 =
|
| 78 |
-
tier_2 =
|
| 79 |
-
|
| 80 |
print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
|
| 81 |
# print(f"DOCS RETRIEVED: {mp_docs.values}")
|
| 82 |
|
|
|
|
| 44 |
|
| 45 |
def retrieve_thoughts(query, ):
|
| 46 |
# print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10))
|
| 47 |
+
docs_with_score = db.similarity_search_with_score(query = query, k = 1500, fetch_k = len(db.index_to_docstore_id.values()))
|
| 48 |
+
df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
|
| 49 |
+
df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
|
| 50 |
+
df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)
|
| 51 |
+
|
| 52 |
# TO-DO: What if user query doesn't match what we provide as documents
|
| 53 |
+
|
| 54 |
+
tier_1 = df[df['score'] < 0.7]
|
| 55 |
+
tier_2 = df[(df['score'] < 0.95) * (df["score"] > 0.7)]
|
| 56 |
+
|
| 57 |
+
chunks_1 = tier_1.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
|
| 58 |
+
tier_1_adjusted = tier_1.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
|
| 59 |
+
tier_1_adjusted['content'] = chunks_1
|
| 60 |
|
| 61 |
+
chunks_2 = tier_2.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
|
| 62 |
+
tier_2_adjusted = tier_2.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
|
| 63 |
+
tier_2_adjusted['content'] = chunks_2
|
| 64 |
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# tier_1 = [doc[0] for doc in docs if ((doc[1] < 1))][:5]
|
| 68 |
+
# tier_2 = [doc[0] for doc in docs if ((doc[1] > 0.7)*(doc[1] < 1.5))][10:15]
|
| 69 |
+
|
| 70 |
+
return {'tier 1':tier_1_adjusted.loc[:5], 'tier 2': tier_2.loc[:5]}
|
| 71 |
|
| 72 |
def qa_retrieve(query,):
|
| 73 |
|
|
|
|
| 88 |
tier_1 = thoughts['tier 1']
|
| 89 |
tier_2 = thoughts['tier 2']
|
| 90 |
|
| 91 |
+
reference = tier_1_adjusted[['ref', 'url', 'title']].to_dict('records')
|
| 92 |
+
|
| 93 |
+
tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])+1}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
|
| 94 |
+
tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
|
|
|
|
| 95 |
print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
|
| 96 |
# print(f"DOCS RETRIEVED: {mp_docs.values}")
|
| 97 |
|