kinely commited on
Commit
4b8e1c7
·
verified ·
1 Parent(s): b0f9153

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -79
app.py CHANGED
@@ -1,96 +1,37 @@
1
- import streamlit as st
2
- from transformers import T5ForConditionalGeneration, T5Tokenizer
3
- from sentence_transformers import SentenceTransformer
4
- import faiss
5
- import torch
6
- import wikipediaapi
7
-
8
- # Initialize Wikipedia API with a custom user-agent
9
- wiki_wiki = wikipediaapi.Wikipedia(
10
- language='en',
11
- user_agent='HumanizedTextApp/1.0 (kinelyaydenseo19@gmail.com)'
12
- )
13
-
14
- # Function to fetch content from Wikipedia
15
- def fetch_wikipedia_articles(titles):
16
- corpus = []
17
- for title in titles:
18
- page = wiki_wiki.page(title)
19
- if page.exists():
20
- corpus.append(page.text)
21
- else:
22
- st.write(f"Page for '{title}' does not exist.")
23
- return corpus
24
-
25
- # Initialize SentenceTransformer for embeddings
26
- embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
27
-
28
- # Fetch and create the corpus
29
- titles = [
30
- "Crypto",
31
- "Finance",
32
- "Technology",
33
- "Healthcare",
34
- "Education"
35
- ]
36
- st.write("Fetching Wikipedia articles...")
37
- corpus = fetch_wikipedia_articles(titles)
38
 
39
- # Check if corpus is populated
40
- if not corpus:
41
- st.write("No articles found. Please check the titles.")
42
- else:
43
- st.write("Articles fetched successfully.")
44
 
45
- # Generate embeddings for the corpus
46
- st.write("Generating embeddings...")
47
- embeddings = embedder.encode(corpus, convert_to_tensor=True)
48
- embeddings_np = embeddings.cpu().numpy()
49
 
50
- # Initialize FAISS index and add embeddings
51
- faiss_index = faiss.IndexFlatL2(embeddings_np.shape[1])
52
- faiss_index.add(embeddings_np)
53
 
54
- # Load model and tokenizer
55
- model_name = "google/flan-t5-base"
56
- model = T5ForConditionalGeneration.from_pretrained(model_name)
57
- tokenizer = T5Tokenizer.from_pretrained(model_name)
58
 
59
  # Streamlit interface
60
- st.title("Humanized AI Text Generator")
61
 
62
  # Input from the user
63
- user_input = st.text_area("Enter your query here (e.g., about a country, concept, etc.)", height=200)
64
 
65
  if st.button("Generate Humanized Text"):
66
  if user_input.strip():
67
- # Retrieve context from FAISS based on user input embedding
68
- query_embedding = embedder.encode([user_input], convert_to_tensor=True)
69
- _, top_k_indices = faiss_index.search(query_embedding.cpu().numpy(), k=5)
70
-
71
- # Retrieve documents based on FAISS top_k_indices
72
- def retrieve_documents(top_k_indices):
73
- return " ".join([corpus[i] for i in top_k_indices[0]])
74
-
75
- context = retrieve_documents(top_k_indices)
76
-
77
- # Check if context is empty
78
- if not context:
79
- st.write("No relevant context found. Please try a different query.")
80
- else:
81
- # Concatenate user input and context for model input
82
- input_text = f"{user_input} {context}"
83
 
84
- # Tokenize input and handle truncation
85
- inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True)
86
-
87
- # Generate output
88
  outputs = model.generate(inputs.input_ids, max_length=2000, num_return_sequences=1)
89
 
90
- # Decode the generated text
91
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
92
 
93
- # Display the generated text
94
- st.write(generated_text)
95
  else:
96
  st.write("Please enter a valid query.")
 
 
1
+ from datasets import load_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ # Load the BookCorpus74M dataset
4
+ ds = load_dataset("raddwolf/BookCorpus74M")
 
 
 
5
 
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
7
 
8
+ # Load the tokenizer and model
9
+ tokenizer = AutoTokenizer.from_pretrained("FabbriSimo01/Facebook_opt_1.3b_Quantized")
10
+ model = AutoModelForCausalLM.from_pretrained("FabbriSimo01/Facebook_opt_1.3b_Quantized")
11
 
12
+ import streamlit as st
13
+ import torch
 
 
14
 
15
  # Streamlit interface
16
+ st.title("Humanized Text Generation App")
17
 
18
  # Input from the user
19
+ user_input = st.text_area("Enter your query here:", height=200)
20
 
21
  if st.button("Generate Humanized Text"):
22
  if user_input.strip():
23
+ # Prepare the input
24
+ inputs = tokenizer(user_input, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Generate output
27
+ with torch.no_grad():
 
 
28
  outputs = model.generate(inputs.input_ids, max_length=2000, num_return_sequences=1)
29
 
30
+ # Decode the generated text
31
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
+ # Display the generated text
34
+ st.write(generated_text)
35
  else:
36
  st.write("Please enter a valid query.")
37
+