Spaces:
Runtime error
Runtime error
dron3flyv3r commited on
Commit ·
6d1b7ca
1
Parent(s): a1d7b67
Add AutoTokenizer for text summarization
Browse files- app.py +15 -2
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
from huggingface_hub import InferenceClient
|
|
|
|
| 4 |
|
| 5 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
| 6 |
|
|
@@ -15,19 +16,31 @@ def transcript_audio(audio_file) -> str:
|
|
| 15 |
def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
|
| 16 |
llm_model = "google/gemma-7b-it"
|
| 17 |
api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
|
|
|
|
| 18 |
if conclusion:
|
| 19 |
-
|
| 20 |
else:
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
|
| 23 |
print(summary)
|
| 24 |
return summary
|
| 25 |
|
|
|
|
| 26 |
def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
|
| 27 |
if audio_file:
|
| 28 |
text = transcript_audio(audio_file)
|
| 29 |
summary = summarize_text(text, bullet_points, conclusion)
|
| 30 |
return summary
|
|
|
|
|
|
|
| 31 |
# make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
|
| 32 |
iface = gr.Interface(
|
| 33 |
fn=control,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
+
from transformers import AutoTokenizer
|
| 5 |
|
| 6 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
| 7 |
|
|
|
|
| 16 |
def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str:
|
| 17 |
llm_model = "google/gemma-7b-it"
|
| 18 |
api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY)
|
| 19 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY)
|
| 20 |
if conclusion:
|
| 21 |
+
user_chat = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}"
|
| 22 |
else:
|
| 23 |
+
user_chat = (
|
| 24 |
+
f"Summarize the following text into {bullet_points} bullet points: {text}"
|
| 25 |
+
)
|
| 26 |
+
chat = [
|
| 27 |
+
{"role": "user", "content": user_chat},
|
| 28 |
+
]
|
| 29 |
+
prompt = tokenizer.apply_chat_template(
|
| 30 |
+
chat, tokenize=False, add_generation_prompt=True
|
| 31 |
+
)
|
| 32 |
summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True)
|
| 33 |
print(summary)
|
| 34 |
return summary
|
| 35 |
|
| 36 |
+
|
| 37 |
def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str:
|
| 38 |
if audio_file:
|
| 39 |
text = transcript_audio(audio_file)
|
| 40 |
summary = summarize_text(text, bullet_points, conclusion)
|
| 41 |
return summary
|
| 42 |
+
|
| 43 |
+
|
| 44 |
# make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary
|
| 45 |
iface = gr.Interface(
|
| 46 |
fn=control,
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
transformers
|