kinely commited on
Commit
b07fc63
·
verified ·
1 Parent(s): f418744

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -8,11 +8,10 @@ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v
8
  model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
9
 
10
  # Prepare dataset (Wikipedia dataset can be used)
11
- # Example: [title, text] pairs
12
  corpus = ["Article text 1", "Article text 2", "Article text 3"]
13
 
14
  # Tokenize and encode
15
- encoded_texts = [model(**tokenizer(text, return_tensors='pt', padding=True)).last_hidden_state.mean(1).detach().numpy() for text in corpus]
16
 
17
  # Create FAISS index
18
  dimension = encoded_texts[0].shape[1]
@@ -20,7 +19,7 @@ index = faiss.IndexFlatL2(dimension)
20
  index.add(np.vstack(encoded_texts))
21
 
22
  def retrieve(query, k=5):
23
- query_vector = model(**tokenizer(query, return_tensors='pt')).last_hidden_state.mean(1).detach().numpy()
24
  distances, indices = index.search(query_vector, k)
25
  return [corpus[i] for i in indices[0]]
26
 
@@ -33,7 +32,7 @@ def generate_response(query):
33
  flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
34
 
35
  input_text = f"Generate a human-like response: {query}. Context: {context}"
36
- input_ids = flan_t5_tokenizer(input_text, return_tensors="pt").input_ids
37
 
38
  # Generate text with length constraint
39
  generated_ids = flan_t5_model.generate(input_ids, max_length=1500)
 
8
  model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
9
 
10
  # Prepare dataset (Wikipedia dataset can be used)
 
11
  corpus = ["Article text 1", "Article text 2", "Article text 3"]
12
 
13
  # Tokenize and encode
14
+ encoded_texts = [model(**tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)).last_hidden_state.mean(1).detach().numpy() for text in corpus]
15
 
16
  # Create FAISS index
17
  dimension = encoded_texts[0].shape[1]
 
19
  index.add(np.vstack(encoded_texts))
20
 
21
  def retrieve(query, k=5):
22
+ query_vector = model(**tokenizer(query, return_tensors='pt', truncation=True, max_length=512)).last_hidden_state.mean(1).detach().numpy()
23
  distances, indices = index.search(query_vector, k)
24
  return [corpus[i] for i in indices[0]]
25
 
 
32
  flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
33
 
34
  input_text = f"Generate a human-like response: {query}. Context: {context}"
35
+ input_ids = flan_t5_tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).input_ids
36
 
37
  # Generate text with length constraint
38
  generated_ids = flan_t5_model.generate(input_ids, max_length=1500)