CryptoScoutv1 commited on
Commit
de173bf
·
1 Parent(s): be999dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -1,23 +1,36 @@
1
- from transformers import GPT2LMHeadModel, GPT2Tokenizer
2
  from datasets import load_dataset
3
  import gradio as gr
4
 
5
- # Load the dataset
6
- dataset = load_dataset("luisotorres/wikipedia-crypto-articles", split='train') # Adjust split as needed
 
 
 
7
 
8
- # Initialize tokenizer and model for GPT-2 text generation
9
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
10
- model = GPT2LMHeadModel.from_pretrained('gpt2')
 
 
 
11
 
12
- # Function to generate text
 
13
  def generate_text(prompt):
 
14
  encoded_input = tokenizer(prompt, return_tensors='pt')
15
- max_length = len(encoded_input["input_ids"][0]) + 50 # Example max length
 
 
16
  output_sequences = model.generate(input_ids=encoded_input["input_ids"], max_length=max_length)
 
 
17
  return tokenizer.decode(output_sequences[0], skip_special_tokens=True)
18
 
19
- # Creating a Gradio interface
 
20
  iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
21
 
22
- # Launching the interface
23
  iface.launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
  from datasets import load_dataset
3
  import gradio as gr
4
 
5
+ # --- Model and Tokenizer Setup ---
6
+ # Replace 'model_id' with the model of your choice from Hugging Face.
7
+ model_id = "gpt2" # Example: "gpt2", "EleutherAI/gpt-neo-2.7B", etc.
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(model_id)
10
 
11
+ # --- Dataset Loading ---
12
+ # Replace with your desired dataset.
13
+ # For example, "luisotorres/wikipedia-crypto-articles", "wikitext", "imdb", etc.
14
+ dataset_name = "luisotorres/wikipedia-crypto-articles"
15
+ dataset_split = 'train' # Choose from 'train', 'test', 'validation', etc.
16
+ dataset = load_dataset(dataset_name, split=dataset_split)
17
 
18
+ # --- Text Generation Function ---
19
+ # This function takes a text prompt and generates a continuation.
20
  def generate_text(prompt):
21
+ # Tokenize the input prompt text
22
  encoded_input = tokenizer(prompt, return_tensors='pt')
23
+
24
+ # Generate text continuation. Adjust 'max_length' as needed.
25
+ max_length = len(encoded_input["input_ids"][0]) + 50
26
  output_sequences = model.generate(input_ids=encoded_input["input_ids"], max_length=max_length)
27
+
28
+ # Decode and return the generated text
29
  return tokenizer.decode(output_sequences[0], skip_special_tokens=True)
30
 
31
+ # --- Gradio Interface Setup ---
32
+ # Set up a simple Gradio interface for interacting with the model.
33
  iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
34
 
35
+ # --- Launch the Interface ---
36
  iface.launch()