Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from langchain_community.llms import OpenAI | |
| from langchain.prompts import PromptTemplate | |
| import os | |
| from dotenv import load_dotenv | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| load_dotenv() | |
| system_prompt_1 = """ | |
| You are an advanced AI assistant tasked with helping to transcribes given texts into | |
| simplified languages, specifically FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language). | |
| This system is intended to streamline the creation of accessible content for government websites. | |
| Instructions for AI Development: | |
| detect the language of text given then transcribes text into the same language which the guidelines of | |
| FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language) and | |
| accurately transcribe complex texts into simplified language. | |
| Ensure maintaining the context and meaning of the original text while simplifying its language. | |
| text: {text} | |
| transcribes text: """ | |
| def translate_text(file, text_input): | |
| repo_id = "mistralai/Mistral-7B-Instruct-v0.2" | |
| # Initialize the HuggingFace endpoint | |
| llm = HuggingFaceEndpoint( | |
| repo_id=repo_id, | |
| max_length=128, | |
| temperature=0.5, | |
| huggingfacehub_api_token=os.environ["api_token"] , | |
| ) | |
| # Read the input file based on its type | |
| if file.name.endswith('.txt'): | |
| with open(file.name, 'r', encoding='utf-8') as f: | |
| file_text = f.read() | |
| elif file.name.endswith('.rtf'): | |
| import pyth.plugins.rtf15.reader as rtf15_reader | |
| import pyth.plugins.plaintext.writer as plaintext_writer | |
| doc = rtf15_reader.read(f) | |
| file_text = plaintext_writer.write(doc).getvalue() | |
| else: | |
| raise ValueError("Unsupported file type. Please upload a .txt or .rtf file.") | |
| # Define the prompt template and create the LLM chain | |
| prompt = PromptTemplate.from_template(system_prompt_1) | |
| llm_chain = prompt | llm | |
| file_translation = llm_chain.invoke({"text": file_text}) | |
| # Determine the output file path and type | |
| if file.name.endswith('.txt'): | |
| output_file_path = "translated_file.txt" | |
| elif file.name.endswith('.rtf'): | |
| output_file_path = "translated_file.rtf" | |
| # Write the translated text to the output file | |
| with open(output_file_path, 'w', encoding='utf-8') as f: | |
| f.write(file_translation) | |
| return file_translation, output_file_path | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=translate_text, | |
| inputs=[gr.File(label="Upload Text File")], | |
| outputs=[ | |
| gr.Textbox(label="Transcribed Content"), | |
| gr.File(label="Download Translated File") | |
| ], | |
| title="Text Transcriber", | |
| description="Upload a .txt or .rtf file to translate its content using LangChain and Mistral-7B-Instruct-v0.2 model with predefined system prompts.", | |
| allow_flagging="never" | |
| ) | |
| iface.launch(debug=True) | |