kinely commited on
Commit
f418744
·
verified ·
1 Parent(s): f7427af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModel, T5Tokenizer, T5ForConditionalGeneration
3
+ import faiss
4
+ import numpy as np
5
+
6
+ # Load model and tokenizer for sentence transformers
7
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
8
+ model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
9
+
10
+ # Prepare dataset (Wikipedia dataset can be used)
11
+ # Example: [title, text] pairs
12
+ corpus = ["Article text 1", "Article text 2", "Article text 3"]
13
+
14
+ # Tokenize and encode
15
+ encoded_texts = [model(**tokenizer(text, return_tensors='pt', padding=True)).last_hidden_state.mean(1).detach().numpy() for text in corpus]
16
+
17
+ # Create FAISS index
18
+ dimension = encoded_texts[0].shape[1]
19
+ index = faiss.IndexFlatL2(dimension)
20
+ index.add(np.vstack(encoded_texts))
21
+
22
+ def retrieve(query, k=5):
23
+ query_vector = model(**tokenizer(query, return_tensors='pt')).last_hidden_state.mean(1).detach().numpy()
24
+ distances, indices = index.search(query_vector, k)
25
+ return [corpus[i] for i in indices[0]]
26
+
27
+ def generate_response(query):
28
+ retrieved_docs = retrieve(query)
29
+ context = " ".join(retrieved_docs)
30
+
31
+ # Use the retrieved context to generate a humanized response
32
+ flan_t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
33
+ flan_t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
34
+
35
+ input_text = f"Generate a human-like response: {query}. Context: {context}"
36
+ input_ids = flan_t5_tokenizer(input_text, return_tensors="pt").input_ids
37
+
38
+ # Generate text with length constraint
39
+ generated_ids = flan_t5_model.generate(input_ids, max_length=1500)
40
+ response = flan_t5_tokenizer.decode(generated_ids[0], skip_special_tokens=True)
41
+ return response
42
+
43
+ def trim_to_word_limit(text, word_limit=1500):
44
+ words = text.split()
45
+ if len(words) > word_limit:
46
+ return " ".join(words[:word_limit])
47
+ return text
48
+
49
+ # Streamlit UI
50
+ st.title("Humanized Text Generator with RAG")
51
+
52
+ # Input for the query
53
+ query = st.text_input("Enter your query:")
54
+
55
+ # Generate button
56
+ if st.button("Generate"):
57
+ with st.spinner("Generating response..."):
58
+ response = generate_response(query)
59
+ response = trim_to_word_limit(response)
60
+ st.write("### Generated Response:")
61
+ st.write(response)
62
+
63
+ # Additional info or about section
64
+ st.write("This app uses FAISS, sentence-transformers, and FLAN-T5 to generate contextually relevant human-like responses.")