gcrocetti commited on
Commit
35c1873
·
verified ·
1 Parent(s): 6e08d40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -110
app.py CHANGED
@@ -1,110 +1,108 @@
1
- %%writefile app.py
2
-
3
- import streamlit as st
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- import torch
6
- import os
7
- from huggingface_hub import login
8
-
9
-
10
- model_name_tinyllama = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
11
- tokenizer_tinyllama = AutoTokenizer.from_pretrained(model_name_tinyllama)
12
- model_tinyllama = AutoModelForCausalLM.from_pretrained(model_name_tinyllama,torch_dtype=torch.float32,device_map={"": "cpu"})
13
-
14
- def summarize_tinyllama(article):
15
- # For causal models like TinyLlama, summarization isn't a direct task like with encoder-decoder models.
16
- # We can prompt it to continue a summary.
17
- prompt="Summarize the following article clearly and concisely:"
18
- input_text = f"{prompt}\n{article}\nSummary:"
19
- inputs = tokenizer_tinyllama(input_text, return_tensors="pt", max_length=1024, truncation=True)
20
-
21
- # Generate tokens - the model will try to complete the input prompt.
22
- # We need to adjust generation parameters for open-ended generation.
23
- # max_new_tokens controls how much new text is generated after the prompt.
24
- outputs = model_tinyllama.generate(
25
- inputs["input_ids"],
26
- attention_mask=inputs["attention_mask"],
27
- max_new_tokens=500, # Generate up to 300 new tokens for the summary
28
- do_sample=True, # Don't sample, use greedy decoding
29
- temperature=0.7,
30
- min_new_tokens=150,
31
- top_p=0.9,
32
- pad_token_id=tokenizer_tinyllama.eos_token_id, # Pad with EOS token if needed
33
- )
34
-
35
- # Decode the entire output sequence.
36
- generated_text = tokenizer_tinyllama.decode(outputs[0], skip_special_tokens=True)
37
-
38
-
39
- # The generated text will include the original prompt. We need to extract the summary part.
40
- # This is a simple approach, more sophisticated parsing might be needed depending on prompt and output.
41
- summary_start_index = generated_text.find("Summary:") + len("Summary:")
42
- summary = generated_text[summary_start_index:].strip()
43
-
44
- return summary
45
-
46
- def answer_question_tinyllama(article, question):
47
- # Formulate the prompt to guide the TinyLlama model to answer the question based on the article.
48
- # We ask the model to act as an AI answering a question based on the provided text.
49
- input_text = f"From this Article: {article}\n\n Answer the below Question: {question}\n\nAnswer:"
50
-
51
- # Tokenize the input text
52
- # Truncate if the combined article and question is too long
53
- inputs = tokenizer_tinyllama(input_text, return_tensors="pt", max_length=1024, truncation=True)
54
-
55
- # Generate the answer using the model.
56
- # We use generate with parameters suitable for generating a concise answer.
57
- outputs = model_tinyllama.generate(
58
- inputs["input_ids"],
59
- attention_mask=inputs["attention_mask"],
60
- max_new_tokens=500, # Generate up to 100 new tokens for the answer
61
- do_sample=True, # Use sampling to potentially get more varied answers
62
- temperature=0.7, # Control randomness
63
- top_p=0.9, # Nucleus sampling
64
- pad_token_id=tokenizer_tinyllama.eos_token_id, # Pad with EOS token if needed
65
- )
66
- # Decode the generated sequence
67
- generated_text = tokenizer_tinyllama.decode(outputs[0], skip_special_tokens=True)
68
-
69
- # The generated text will include the original prompt. We need to extract the answer part.
70
- # This is a simple approach, more sophisticated parsing might be needed depending on prompt and output.
71
- answer_start_index = generated_text.find("Answer:") + len("Answer:")
72
- answer = generated_text[answer_start_index:].strip()
73
-
74
- # Basic cleanup: remove potential repetition of the question or prompt in the answer
75
- if answer.startswith(question):
76
- answer = answer[len(question):].strip()
77
-
78
- return answer
79
-
80
-
81
- st.title("Smart Article Insights Generator")
82
- st.markdown("Summarize an article or ask a question about it.")
83
-
84
- mode = st.radio("Select Mode", ["Summarize", "Answer Question"])
85
-
86
- article_input = st.text_area("Article Text", height=300, placeholder="Paste the article here...")
87
-
88
- question_input = None
89
- if mode == "Answer Question":
90
- question_input = st.text_input("Question", placeholder="Enter your question here...")
91
-
92
- if st.button("Process"):
93
- if mode == "Summarize":
94
- if article_input:
95
- with st.spinner("Generating summary..."):
96
- output = summarize_tinyllama(article_input)
97
- st.subheader("Summary")
98
- st.write(output)
99
- else:
100
- st.warning("Please provide an article to summarize.")
101
- elif mode == "Answer Question":
102
- if article_input and question_input:
103
- with st.spinner("Generating answer..."):
104
- output = answer_question_tinyllama(article_input, question_input)
105
- st.subheader("Answer")
106
- st.write(output)
107
- elif not article_input:
108
- st.warning("Please provide an article to answer the question from.")
109
- elif not question_input:
110
- st.warning("Please provide a question to answer.")
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import os
5
+ from huggingface_hub import login
6
+
7
+
8
+ model_name_tinyllama = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
9
+ tokenizer_tinyllama = AutoTokenizer.from_pretrained(model_name_tinyllama)
10
+ model_tinyllama = AutoModelForCausalLM.from_pretrained(model_name_tinyllama,torch_dtype=torch.float32,device_map={"": "cpu"})
11
+
12
+ def summarize_tinyllama(article):
13
+ # For causal models like TinyLlama, summarization isn't a direct task like with encoder-decoder models.
14
+ # We can prompt it to continue a summary.
15
+ prompt="Summarize the following article clearly and concisely:"
16
+ input_text = f"{prompt}\n{article}\nSummary:"
17
+ inputs = tokenizer_tinyllama(input_text, return_tensors="pt", max_length=1024, truncation=True)
18
+
19
+ # Generate tokens - the model will try to complete the input prompt.
20
+ # We need to adjust generation parameters for open-ended generation.
21
+ # max_new_tokens controls how much new text is generated after the prompt.
22
+ outputs = model_tinyllama.generate(
23
+ inputs["input_ids"],
24
+ attention_mask=inputs["attention_mask"],
25
+ max_new_tokens=500, # Generate up to 300 new tokens for the summary
26
+ do_sample=True, # Don't sample, use greedy decoding
27
+ temperature=0.7,
28
+ min_new_tokens=150,
29
+ top_p=0.9,
30
+ pad_token_id=tokenizer_tinyllama.eos_token_id, # Pad with EOS token if needed
31
+ )
32
+
33
+ # Decode the entire output sequence.
34
+ generated_text = tokenizer_tinyllama.decode(outputs[0], skip_special_tokens=True)
35
+
36
+
37
+ # The generated text will include the original prompt. We need to extract the summary part.
38
+ # This is a simple approach, more sophisticated parsing might be needed depending on prompt and output.
39
+ summary_start_index = generated_text.find("Summary:") + len("Summary:")
40
+ summary = generated_text[summary_start_index:].strip()
41
+
42
+ return summary
43
+
44
+ def answer_question_tinyllama(article, question):
45
+ # Formulate the prompt to guide the TinyLlama model to answer the question based on the article.
46
+ # We ask the model to act as an AI answering a question based on the provided text.
47
+ input_text = f"From this Article: {article}\n\n Answer the below Question: {question}\n\nAnswer:"
48
+
49
+ # Tokenize the input text
50
+ # Truncate if the combined article and question is too long
51
+ inputs = tokenizer_tinyllama(input_text, return_tensors="pt", max_length=1024, truncation=True)
52
+
53
+ # Generate the answer using the model.
54
+ # We use generate with parameters suitable for generating a concise answer.
55
+ outputs = model_tinyllama.generate(
56
+ inputs["input_ids"],
57
+ attention_mask=inputs["attention_mask"],
58
+ max_new_tokens=500, # Generate up to 100 new tokens for the answer
59
+ do_sample=True, # Use sampling to potentially get more varied answers
60
+ temperature=0.7, # Control randomness
61
+ top_p=0.9, # Nucleus sampling
62
+ pad_token_id=tokenizer_tinyllama.eos_token_id, # Pad with EOS token if needed
63
+ )
64
+ # Decode the generated sequence
65
+ generated_text = tokenizer_tinyllama.decode(outputs[0], skip_special_tokens=True)
66
+
67
+ # The generated text will include the original prompt. We need to extract the answer part.
68
+ # This is a simple approach, more sophisticated parsing might be needed depending on prompt and output.
69
+ answer_start_index = generated_text.find("Answer:") + len("Answer:")
70
+ answer = generated_text[answer_start_index:].strip()
71
+
72
+ # Basic cleanup: remove potential repetition of the question or prompt in the answer
73
+ if answer.startswith(question):
74
+ answer = answer[len(question):].strip()
75
+
76
+ return answer
77
+
78
+
79
+ st.title("Smart Article Insights Generator")
80
+ st.markdown("Summarize an article or ask a question about it.")
81
+
82
+ mode = st.radio("Select Mode", ["Summarize", "Answer Question"])
83
+
84
+ article_input = st.text_area("Article Text", height=300, placeholder="Paste the article here...")
85
+
86
+ question_input = None
87
+ if mode == "Answer Question":
88
+ question_input = st.text_input("Question", placeholder="Enter your question here...")
89
+
90
+ if st.button("Process"):
91
+ if mode == "Summarize":
92
+ if article_input:
93
+ with st.spinner("Generating summary..."):
94
+ output = summarize_tinyllama(article_input)
95
+ st.subheader("Summary")
96
+ st.write(output)
97
+ else:
98
+ st.warning("Please provide an article to summarize.")
99
+ elif mode == "Answer Question":
100
+ if article_input and question_input:
101
+ with st.spinner("Generating answer..."):
102
+ output = answer_question_tinyllama(article_input, question_input)
103
+ st.subheader("Answer")
104
+ st.write(output)
105
+ elif not article_input:
106
+ st.warning("Please provide an article to answer the question from.")
107
+ elif not question_input:
108
+ st.warning("Please provide a question to answer.")