File size: 3,425 Bytes
c14f240 08048bb c14f240 2b9ef16 c14f240 2b9ef16 c14f240 b3ce780 c14f240 b3ce780 c14f240 b3ce780 c14f240 b3ce780 c14f240 b3ce780 c14f240 b3ce780 c14f240 b3ce780 c14f240 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # import streamlit as st
# from transformers import pipeline
# # pipe = pipeline('sentiment-analysis')
# text = st.text_area('enter text: ')
# generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
# technical_text = """
# The CRISPR-Cas9 system enables precise genome editing by creating double-strand breaks at specific DNA locations, facilitating targeted genetic modifications.
# """
# # Prompt para transformación
# prompt = f"Rewrite the following technical text in simple terms for a general audience:\n\n{text}\n\nSimplified version:"
# # Generar texto transformado
# result = generator(
# prompt,
# max_length=256,
# num_return_sequences=1,
# do_sample=True,
# temperature=0.1,
# top_p=0.9,
# repetition_penalty=1.1,
# )
# print(result[0]['generated_text'])
# if text:
# out = pipe(text)
# st.json(out)
from transformers import pipeline
import json
# Step 1: Rewriting the technical text in accessible language using T5 model
simplifier = pipeline("summarization", model="t5-small")
def simplify_text(text):
result = simplifier(text, max_length=100, min_length=50, do_sample=False)
return result[0]['summary_text']
# Step 2: Translation to English, Arabic, and French using MarianMT models
translator_en = pipeline("translation_es_to_en", model="Helsinki-NLP/opus-mt-es-en")
translator_ar = pipeline("translation_es_to_ar", model="Helsinki-NLP/opus-mt-es-ar")
translator_fr = pipeline("translation_es_to_fr", model="Helsinki-NLP/opus-mt-es-fr")
def translate_text(text):
translations = {
"english": translator_en(text)[0]['translation_text'],
"arabic": translator_ar(text)[0]['translation_text'],
"french": translator_fr(text)[0]['translation_text']
}
return translations
# Step 3: Identify the main topic using DistilBERT
classifier = pipeline("zero-shot-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
labels = ["Technology", "Science", "Health", "Business", "Education", "Other"]
def identify_topic(text):
classification = classifier(text, candidate_labels=labels)
return classification['labels'][0] # Main topic
# Step 4: Detect the tone of the text using RoBERTa
tone_analyzer = pipeline("sentiment-analysis", model="roberta-base")
def detect_tone(text):
tone_result = tone_analyzer(text)[0]
return tone_result['label'] # This gives a general idea of the tone (positive, neutral, etc.)
# Step 5: Formatting results for web service
def process_text_for_web_service(text):
simplified_text = simplify_text(text)
translations = translate_text(simplified_text)
main_topic = identify_topic(simplified_text)
tone = detect_tone(simplified_text)
# Create a structured output
result = {
"original_text": text,
"simplified_text": simplified_text,
"translations": translations,
"main_topic": main_topic,
"tone": tone
}
# Convert to JSON for web service
return json.dumps(result, ensure_ascii=False, indent=4)
# Example input text (in Spanish)
input_text = "La inteligencia artificial (IA) está revolucionando la industria de la tecnología al permitir nuevas aplicaciones en múltiples campos, desde la salud hasta la educación."
# Run the process
formatted_output = process_text_for_web_service(input_text)
# Output the JSON formatted result
print(formatted_output)
|