kinely commited on
Commit
98bee49
·
verified ·
1 Parent(s): 46d4b9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py CHANGED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
3
+ from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import torch
6
+
7
+ # Load model and tokenizer
8
+ model_name = "google/flan-t5-base"
9
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
10
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
11
+
12
+ # Define your sentence transformer model for the RAG approach
13
+ embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
14
+
15
+ # Build FAISS index (assuming you have precomputed embeddings for your retrieval corpus)
16
+ # embeddings = ... # Your precomputed embeddings go here
17
+ # faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
18
+ # faiss_index.add(embeddings)
19
+
20
+ # Define the Streamlit interface
21
+ st.title("Humanized Text Generator")
22
+
23
+ # Text input from the user
24
+ user_input = st.text_area("Enter your query here", max_chars=2000)
25
+
26
+ # Button to generate text
27
+ if st.button("Generate Humanized Text"):
28
+ if user_input:
29
+ # Convert user input to embedding for retrieval
30
+ query_embedding = embedder.encode([user_input], convert_to_tensor=True)
31
+
32
+ # Retrieve the top k related documents from your FAISS index
33
+ # _, top_k_indices = faiss_index.search(query_embedding.cpu().numpy(), k=5)
34
+
35
+ # Dummy document context (replace this with actual retrieved docs)
36
+ # context = retrieve_documents(top_k_indices)
37
+ context = "Sample context related to the query." # For demonstration
38
+
39
+ # Concatenate query and context
40
+ input_text = f"{user_input} {context}"
41
+
42
+ # Tokenize input and generate output
43
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
44
+ outputs = model.generate(inputs.input_ids, max_length=2000, num_return_sequences=1)
45
+
46
+ # Decode the generated text
47
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+
49
+ # Display the generated text
50
+ st.write(generated_text)
51
+ else:
52
+ st.write("Please enter a query.")
53
+
54
+ import faiss
55
+ import numpy as np
56
+
57
+ # Load your corpus embeddings
58
+ # embeddings = np.load("embeddings.npy")
59
+
60
+ # Initialize FAISS index and add the embeddings
61
+ faiss_index = faiss.IndexFlatL2(embeddings.shape[1]) # Use L2 distance
62
+ faiss_index.add(embeddings)
63
+
64
+ # When you have a query, encode it and retrieve the top documents
65
+ query_embedding = embedder.encode([user_input], convert_to_tensor=True)
66
+ _, top_k_indices = faiss_index.search(query_embedding.cpu().numpy(), k=5)
67
+
68
+ # Retrieve documents based on the top_k_indices
69
+ def retrieve_documents(top_k_indices):
70
+ # Here, you would map the indices to the actual documents in your corpus
71
+ # This is just a placeholder
72
+ documents = ["Doc 1", "Doc 2", "Doc 3", "Doc 4", "Doc 5"]
73
+ return " ".join([documents[i] for i in top_k_indices[0]])
74
+