Spaces:

tinchex37
/

demo

Build error

File size: 3,425 Bytes

# import streamlit as st
# from transformers import pipeline

# # pipe = pipeline('sentiment-analysis')
# text = st.text_area('enter text: ')

# generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")


# technical_text = """
# The CRISPR-Cas9 system enables precise genome editing by creating double-strand breaks at specific DNA locations, facilitating targeted genetic modifications.
# """

# # Prompt para transformación
# prompt = f"Rewrite the following technical text in simple terms for a general audience:\n\n{text}\n\nSimplified version:"

# # Generar texto transformado
# result = generator(
#     prompt,
#     max_length=256,
#     num_return_sequences=1,
#     do_sample=True,
#     temperature=0.1,
#     top_p=0.9,
#     repetition_penalty=1.1,
# )

# print(result[0]['generated_text'])


# if text:
#     out = pipe(text)
#     st.json(out)




from transformers import pipeline
import json

# Step 1: Rewriting the technical text in accessible language using T5 model
simplifier = pipeline("summarization", model="t5-small")

def simplify_text(text):
    result = simplifier(text, max_length=100, min_length=50, do_sample=False)
    return result[0]['summary_text']

# Step 2: Translation to English, Arabic, and French using MarianMT models
translator_en = pipeline("translation_es_to_en", model="Helsinki-NLP/opus-mt-es-en")
translator_ar = pipeline("translation_es_to_ar", model="Helsinki-NLP/opus-mt-es-ar")
translator_fr = pipeline("translation_es_to_fr", model="Helsinki-NLP/opus-mt-es-fr")

def translate_text(text):
    translations = {
        "english": translator_en(text)[0]['translation_text'],
        "arabic": translator_ar(text)[0]['translation_text'],
        "french": translator_fr(text)[0]['translation_text']
    }
    return translations

# Step 3: Identify the main topic using DistilBERT
classifier = pipeline("zero-shot-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
labels = ["Technology", "Science", "Health", "Business", "Education", "Other"]

def identify_topic(text):
    classification = classifier(text, candidate_labels=labels)
    return classification['labels'][0]  # Main topic

# Step 4: Detect the tone of the text using RoBERTa
tone_analyzer = pipeline("sentiment-analysis", model="roberta-base")

def detect_tone(text):
    tone_result = tone_analyzer(text)[0]
    return tone_result['label']  # This gives a general idea of the tone (positive, neutral, etc.)

# Step 5: Formatting results for web service
def process_text_for_web_service(text):
    simplified_text = simplify_text(text)
    translations = translate_text(simplified_text)
    main_topic = identify_topic(simplified_text)
    tone = detect_tone(simplified_text)
    
    # Create a structured output
    result = {
        "original_text": text,
        "simplified_text": simplified_text,
        "translations": translations,
        "main_topic": main_topic,
        "tone": tone
    }
    
    # Convert to JSON for web service
    return json.dumps(result, ensure_ascii=False, indent=4)

# Example input text (in Spanish)
input_text = "La inteligencia artificial (IA) está revolucionando la industria de la tecnología al permitir nuevas aplicaciones en múltiples campos, desde la salud hasta la educación."

# Run the process
formatted_output = process_text_for_web_service(input_text)

# Output the JSON formatted result
print(formatted_output)