Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import pipeline
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
| 4 |
import math
|
| 5 |
|
|
@@ -12,7 +12,9 @@ translation_models = {
|
|
| 12 |
}
|
| 13 |
|
| 14 |
# Initialize summarization pipeline with a specified model
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Initialize translation pipeline
|
| 18 |
def get_translator(language):
|
|
@@ -38,23 +40,15 @@ def generate_bullet_points(text):
|
|
| 38 |
|
| 39 |
# Helper function to split text into chunks
|
| 40 |
def split_text(text, max_tokens=1024):
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
current_tokens = 0
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
current_tokens = sentence_tokens
|
| 52 |
-
else:
|
| 53 |
-
current_chunk += sentence + ". "
|
| 54 |
-
current_tokens += sentence_tokens
|
| 55 |
-
|
| 56 |
-
if current_chunk:
|
| 57 |
-
chunks.append(current_chunk.strip())
|
| 58 |
|
| 59 |
return chunks
|
| 60 |
|
|
@@ -82,12 +76,12 @@ def process_text(input_text, language):
|
|
| 82 |
iface = gr.Interface(
|
| 83 |
fn=process_text,
|
| 84 |
inputs=[
|
| 85 |
-
gr.Textbox(label="Input Text", placeholder="Paste your text here..."),
|
| 86 |
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
|
| 87 |
],
|
| 88 |
outputs=[
|
| 89 |
-
gr.Textbox(label="Bullet Points"),
|
| 90 |
-
gr.Textbox(label="Translated Bullet Points")
|
| 91 |
],
|
| 92 |
title="Text to Bullet Points and Translation",
|
| 93 |
description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
|
|
@@ -97,3 +91,4 @@ iface.launch()
|
|
| 97 |
|
| 98 |
|
| 99 |
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import pipeline, AutoTokenizer
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
| 4 |
import math
|
| 5 |
|
|
|
|
| 12 |
}
|
| 13 |
|
| 14 |
# Initialize summarization pipeline with a specified model
|
| 15 |
+
model_name = "sshleifer/distilbart-cnn-12-6"
|
| 16 |
+
summarizer = pipeline("summarization", model=model_name)
|
| 17 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 18 |
|
| 19 |
# Initialize translation pipeline
|
| 20 |
def get_translator(language):
|
|
|
|
| 40 |
|
| 41 |
# Helper function to split text into chunks
|
| 42 |
def split_text(text, max_tokens=1024):
|
| 43 |
+
inputs = tokenizer(text, return_tensors='pt', truncation=False)
|
| 44 |
+
input_ids = inputs['input_ids'][0]
|
| 45 |
+
total_tokens = len(input_ids)
|
|
|
|
| 46 |
|
| 47 |
+
chunks = []
|
| 48 |
+
for i in range(0, total_tokens, max_tokens):
|
| 49 |
+
chunk_ids = input_ids[i:i+max_tokens]
|
| 50 |
+
chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
|
| 51 |
+
chunks.append(chunk_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
return chunks
|
| 54 |
|
|
|
|
| 76 |
iface = gr.Interface(
|
| 77 |
fn=process_text,
|
| 78 |
inputs=[
|
| 79 |
+
gr.Textbox(label="Input Text", placeholder="Paste your text here...", lines=10),
|
| 80 |
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
|
| 81 |
],
|
| 82 |
outputs=[
|
| 83 |
+
gr.Textbox(label="Bullet Points", lines=10),
|
| 84 |
+
gr.Textbox(label="Translated Bullet Points", lines=10)
|
| 85 |
],
|
| 86 |
title="Text to Bullet Points and Translation",
|
| 87 |
description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
|
| 94 |
+
|