Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,11 +8,10 @@ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v
|
|
| 8 |
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 9 |
|
| 10 |
# Prepare dataset (Wikipedia dataset can be used)
|
| 11 |
-
# Example: [title, text] pairs
|
| 12 |
corpus = ["Article text 1", "Article text 2", "Article text 3"]
|
| 13 |
|
| 14 |
# Tokenize and encode
|
| 15 |
-
encoded_texts = [model(**tokenizer(text, return_tensors='pt', padding=True)).last_hidden_state.mean(1).detach().numpy() for text in corpus]
|
| 16 |
|
| 17 |
# Create FAISS index
|
| 18 |
dimension = encoded_texts[0].shape[1]
|
|
@@ -20,7 +19,7 @@ index = faiss.IndexFlatL2(dimension)
|
|
| 20 |
index.add(np.vstack(encoded_texts))
|
| 21 |
|
| 22 |
def retrieve(query, k=5):
|
| 23 |
-
query_vector = model(**tokenizer(query, return_tensors='pt')).last_hidden_state.mean(1).detach().numpy()
|
| 24 |
distances, indices = index.search(query_vector, k)
|
| 25 |
return [corpus[i] for i in indices[0]]
|
| 26 |
|
|
@@ -33,7 +32,7 @@ def generate_response(query):
|
|
| 33 |
flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
|
| 34 |
|
| 35 |
input_text = f"Generate a human-like response: {query}. Context: {context}"
|
| 36 |
-
input_ids = flan_t5_tokenizer(input_text, return_tensors="pt").input_ids
|
| 37 |
|
| 38 |
# Generate text with length constraint
|
| 39 |
generated_ids = flan_t5_model.generate(input_ids, max_length=1500)
|
|
|
|
| 8 |
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 9 |
|
| 10 |
# Prepare dataset (Wikipedia dataset can be used)
|
|
|
|
| 11 |
corpus = ["Article text 1", "Article text 2", "Article text 3"]
|
| 12 |
|
| 13 |
# Tokenize and encode
|
| 14 |
+
encoded_texts = [model(**tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)).last_hidden_state.mean(1).detach().numpy() for text in corpus]
|
| 15 |
|
| 16 |
# Create FAISS index
|
| 17 |
dimension = encoded_texts[0].shape[1]
|
|
|
|
| 19 |
index.add(np.vstack(encoded_texts))
|
| 20 |
|
| 21 |
def retrieve(query, k=5):
|
| 22 |
+
query_vector = model(**tokenizer(query, return_tensors='pt', truncation=True, max_length=512)).last_hidden_state.mean(1).detach().numpy()
|
| 23 |
distances, indices = index.search(query_vector, k)
|
| 24 |
return [corpus[i] for i in indices[0]]
|
| 25 |
|
|
|
|
| 32 |
flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
|
| 33 |
|
| 34 |
input_text = f"Generate a human-like response: {query}. Context: {context}"
|
| 35 |
+
input_ids = flan_t5_tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).input_ids
|
| 36 |
|
| 37 |
# Generate text with length constraint
|
| 38 |
generated_ids = flan_t5_model.generate(input_ids, max_length=1500)
|