Corin1998 commited on
Commit
427fa46
·
verified ·
1 Parent(s): 0881450

Update rag/retriever.py

Browse files
Files changed (1) hide show
  1. rag/retriever.py +4 -14
rag/retriever.py CHANGED
@@ -1,24 +1,14 @@
1
- from app.deps import search, embed_texts
2
- import numpy as np
3
 
4
  def retrieve(query: str, top_k=8):
5
- hits = search(query, top_k=top_k)
6
- return hits
7
 
8
  def format_citations(hits):
9
- # 重複URLをまとめて [1], [2]...を付与
10
  url_to_id = {}
11
- citations = {}
12
  for h in hits:
13
  u = h["source_url"]
14
  if u not in url_to_id:
15
  url_to_id[u] = len(url_to_id) + 1
16
- citations.append((url_to_id[u], u))
17
- # unique 保持
18
- uniq = []
19
- seen = set()
20
- for cid, u in citations:
21
- if u in seen: continue
22
- seen.add(u)
23
- uniq.append((cid, u))
24
  return uniq
 
1
+ from app.deps import search
 
2
 
3
  def retrieve(query: str, top_k=8):
4
+ return search(query, top_k=top_k)
 
5
 
6
  def format_citations(hits):
 
7
  url_to_id = {}
8
+ uniq = []
9
  for h in hits:
10
  u = h["source_url"]
11
  if u not in url_to_id:
12
  url_to_id[u] = len(url_to_id) + 1
13
+ uniq.append((url_to_id[u], u))
 
 
 
 
 
 
 
14
  return uniq