Spaces:

deepakaiplanet
/

gov-tech-lab

Sleeping

App Files Files Community

gov-tech-lab / app.py

deepakaiplanet

Update app.py

88120b9 verified over 1 year ago

raw

history blame contribute delete

2.96 kB

	import gradio as gr
	from langchain_community.llms import OpenAI
	from langchain.prompts import PromptTemplate
	import os
	from dotenv import load_dotenv
	from langchain_huggingface import HuggingFaceEndpoint

	load_dotenv()


	system_prompt_1 = """
	You are an advanced AI assistant tasked with helping to transcribes given texts into
	simplified languages, specifically FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language).
	This system is intended to streamline the creation of accessible content for government websites.

	Instructions for AI Development:

	detect the language of text given then transcribes text into the same language which the guidelines of
	FALC (Facile à Lire et à Comprendre) and "Leichte Sprache" (Simple Language) and
	accurately transcribe complex texts into simplified language.

	Ensure maintaining the context and meaning of the original text while simplifying its language.

	text: {text}

	transcribes text: """

	def translate_text(file, text_input):


	repo_id = "mistralai/Mistral-7B-Instruct-v0.2"

	# Initialize the HuggingFace endpoint
	llm = HuggingFaceEndpoint(
	repo_id=repo_id,
	max_length=128,
	temperature=0.5,
	huggingfacehub_api_token=os.environ["api_token"] ,
	)

	# Read the input file based on its type
	if file.name.endswith('.txt'):
	with open(file.name, 'r', encoding='utf-8') as f:
	file_text = f.read()
	elif file.name.endswith('.rtf'):
	import pyth.plugins.rtf15.reader as rtf15_reader
	import pyth.plugins.plaintext.writer as plaintext_writer
	doc = rtf15_reader.read(f)
	file_text = plaintext_writer.write(doc).getvalue()
	else:
	raise ValueError("Unsupported file type. Please upload a .txt or .rtf file.")

	# Define the prompt template and create the LLM chain
	prompt = PromptTemplate.from_template(system_prompt_1)
	llm_chain = prompt \| llm
	file_translation = llm_chain.invoke({"text": file_text})

	# Determine the output file path and type
	if file.name.endswith('.txt'):
	output_file_path = "translated_file.txt"
	elif file.name.endswith('.rtf'):
	output_file_path = "translated_file.rtf"

	# Write the translated text to the output file
	with open(output_file_path, 'w', encoding='utf-8') as f:
	f.write(file_translation)

	return file_translation, output_file_path

	# Create the Gradio interface
	iface = gr.Interface(
	fn=translate_text,
	inputs=[gr.File(label="Upload Text File")],
	outputs=[
	gr.Textbox(label="Transcribed Content"),
	gr.File(label="Download Translated File")
	],
	title="Text Transcriber",
	description="Upload a .txt or .rtf file to translate its content using LangChain and Mistral-7B-Instruct-v0.2 model with predefined system prompts.",
	allow_flagging="never"
	)

	iface.launch(debug=True)