Update app.py
Browse files
app.py
CHANGED
|
@@ -15,10 +15,11 @@ translation_models = {
|
|
| 15 |
'Spanish': "Helsinki-NLP/opus-mt-en-es"
|
| 16 |
}
|
| 17 |
|
| 18 |
-
# Initialize summarization
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
# Initialize translation pipeline
|
| 24 |
def get_translator(language):
|
|
@@ -45,12 +46,12 @@ def split_text(text, max_tokens=1024):
|
|
| 45 |
return chunks
|
| 46 |
|
| 47 |
# Helper function to summarize text
|
| 48 |
-
def summarize_text(text):
|
| 49 |
chunks = split_text(text)
|
| 50 |
summaries = []
|
| 51 |
for chunk in chunks:
|
| 52 |
try:
|
| 53 |
-
summary =
|
| 54 |
summaries.append(summary)
|
| 55 |
except Exception as e:
|
| 56 |
print(f"Error summarizing chunk: {chunk}\nError: {e}")
|
|
@@ -68,9 +69,9 @@ def translate_text(text, language):
|
|
| 68 |
return text
|
| 69 |
return text
|
| 70 |
|
| 71 |
-
def process_text(input_text, language):
|
| 72 |
print(f"Input text: {input_text[:500]}...") # Show only the first 500 characters for brevity
|
| 73 |
-
summary = summarize_text(input_text)
|
| 74 |
print(f"Summary: {summary[:500]}...") # Show only the first 500 characters for brevity
|
| 75 |
bullet_points = generate_bullet_points(summary)
|
| 76 |
print(f"Bullet Points: {bullet_points}")
|
|
@@ -95,6 +96,7 @@ iface = gr.Interface(
|
|
| 95 |
fn=process_text,
|
| 96 |
inputs=[
|
| 97 |
gr.Textbox(label="Input Text", placeholder="Paste your text here...", lines=10),
|
|
|
|
| 98 |
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
|
| 99 |
],
|
| 100 |
outputs=[
|
|
@@ -102,7 +104,7 @@ iface = gr.Interface(
|
|
| 102 |
gr.Textbox(label="Translated Bullet Points", lines=10)
|
| 103 |
],
|
| 104 |
title="Text to Bullet Points and Translation",
|
| 105 |
-
description="Paste any text,
|
| 106 |
)
|
| 107 |
|
| 108 |
iface.launch()
|
|
@@ -116,3 +118,4 @@ iface.launch()
|
|
| 116 |
|
| 117 |
|
| 118 |
|
|
|
|
|
|
| 15 |
'Spanish': "Helsinki-NLP/opus-mt-en-es"
|
| 16 |
}
|
| 17 |
|
| 18 |
+
# Initialize summarization pipelines with specified models
|
| 19 |
+
summarization_models = {
|
| 20 |
+
'Scientific': "allenai/scibert_scivocab_cased",
|
| 21 |
+
'Literature': "t5-small"
|
| 22 |
+
}
|
| 23 |
|
| 24 |
# Initialize translation pipeline
|
| 25 |
def get_translator(language):
|
|
|
|
| 46 |
return chunks
|
| 47 |
|
| 48 |
# Helper function to summarize text
|
| 49 |
+
def summarize_text(text, model):
|
| 50 |
chunks = split_text(text)
|
| 51 |
summaries = []
|
| 52 |
for chunk in chunks:
|
| 53 |
try:
|
| 54 |
+
summary = summarizers[model](chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
| 55 |
summaries.append(summary)
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Error summarizing chunk: {chunk}\nError: {e}")
|
|
|
|
| 69 |
return text
|
| 70 |
return text
|
| 71 |
|
| 72 |
+
def process_text(input_text, model, language):
|
| 73 |
print(f"Input text: {input_text[:500]}...") # Show only the first 500 characters for brevity
|
| 74 |
+
summary = summarize_text(input_text, model)
|
| 75 |
print(f"Summary: {summary[:500]}...") # Show only the first 500 characters for brevity
|
| 76 |
bullet_points = generate_bullet_points(summary)
|
| 77 |
print(f"Bullet Points: {bullet_points}")
|
|
|
|
| 96 |
fn=process_text,
|
| 97 |
inputs=[
|
| 98 |
gr.Textbox(label="Input Text", placeholder="Paste your text here...", lines=10),
|
| 99 |
+
gr.Radio(choices=["Scientific", "Literature"], label="Summarization Model"),
|
| 100 |
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
|
| 101 |
],
|
| 102 |
outputs=[
|
|
|
|
| 104 |
gr.Textbox(label="Translated Bullet Points", lines=10)
|
| 105 |
],
|
| 106 |
title="Text to Bullet Points and Translation",
|
| 107 |
+
description="Paste any text, choose the summarization model, and optionally translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
|
| 108 |
)
|
| 109 |
|
| 110 |
iface.launch()
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
|
| 121 |
+
|