Spaces:
Sleeping
Sleeping
| # Reference: | |
| # https://huggingface.co/spaces/Sagar23p/mistralAI_chatBoat | |
| import gradio as gr | |
| from paddleocr import PaddleOCR, draw_ocr | |
| import asyncio | |
| import requests | |
| from huggingface_hub import InferenceClient | |
| import os | |
| API_TOKEN = os.environ.get('HUGGINGFACE_API_KEY') | |
| API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" | |
| headers = {"Authorization": "Bearer " +API_TOKEN} | |
| def query(question): | |
| client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", headers=headers) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful and honest assistant. Please, respond concisely and truthfully.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": question, | |
| }, | |
| ] | |
| output = client.chat_completion(messages, model="meta-llama/Meta-Llama-3-8B-Instruct", max_tokens=1000) | |
| if output.choices[0].message['content'].find('Yes')>=0: | |
| messages+=[output.choices[0].message] | |
| messages+=[{"role": "user", | |
| "content": "What is the mistake and what is the correct sentence?"}] | |
| output = client.chat_completion(messages, model="meta-llama/Meta-Llama-3-8B-Instruct", max_tokens=1000) | |
| return output.choices[0].message['content'] | |
| def image2Text(image:str, langChoice:str): | |
| ocr = PaddleOCR(use_angle_cls=True, lang=langChoice) # need to run only once to download and load model into memory | |
| img_path = image | |
| result = ocr.ocr(img_path, cls=True) | |
| text = "" | |
| for idx in range(len(result)): | |
| res = result[idx] | |
| for line in res: | |
| import re | |
| # remove pinyin if it's Chinese | |
| if langChoice=="ch": | |
| #t = re.sub('[a-z0-9.]', '', line[1][0]) | |
| t = re.sub('[a-z]', '', line[1][0]) | |
| t = re.sub('[0-9]\.', '', t) | |
| t = t.replace(" ", "") | |
| t = t.replace("()", "") | |
| t = t.replace("()", "") | |
| t = t.replace("( )", "") | |
| t = t.replace("()", "") | |
| if t!="": | |
| text +=((t) + "\n") | |
| else: | |
| print(line) | |
| t = line[1][0] | |
| t = re.sub('Term [0-9] Spelling', '', t) | |
| t = re.sub('Page [0-9]', '', t) | |
| if t!="": | |
| text += (t + "\n") | |
| text = text.replace("\n"," ").replace(".",".\n") | |
| return text | |
| def text2PrevMistake(recognized_text, langChoice:str, current_line, session_data): | |
| if len(session_data) == 0 or session_data[0] == 0 or session_data[0] == 1: | |
| session_data = [] | |
| else: | |
| session_data = [session_data[0]-2] | |
| return text2NextMistake(recognized_text, langChoice, current_line, session_data) | |
| def text2NextMistake(recognized_text, langChoice:str, current_line, session_data): | |
| lines = recognized_text.split("\n") | |
| while 1: | |
| if len(lines) == 0: | |
| return current_line, "No mistake. Empty text.", session_data | |
| elif len(session_data) == 0: | |
| session_data = [0] | |
| current_line = lines[session_data[0]] | |
| elif session_data[0] + 1 >= len(lines): | |
| session_data = [] | |
| return current_line, "No more mistake. End of text", session_data | |
| else: | |
| session_data = [session_data[0]+1] | |
| current_line = lines[session_data[0]] | |
| question = f"Only answer Yes or No. Is there grammatical or logical mistake in the sentence: {current_line}" | |
| correction_text = query(question) | |
| if correction_text.find("No") == 0: | |
| continue | |
| else: | |
| break | |
| return current_line, correction_text, session_data | |
| with gr.Blocks() as demo: | |
| gr.HTML("""<h1 align="center">Composition Corrector</h1>""") | |
| session_data = gr.State([]) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| upload_image = gr.Image(height=400,width=400, value = "compo.jpg") | |
| langChoice = gr.Radio(["en", "ch"], value="en", label="Select lanaguage: 'ch' for Chinese, 'en' for English", info="") | |
| with gr.Column(scale=3): | |
| recognized_text = gr.Textbox(show_label=False, placeholder="composition", lines=15) | |
| toText = gr.Button("Convert image to text") | |
| current_line = gr.Textbox(show_label=False, placeholder="current line", lines=1) | |
| correction_text = gr.Textbox(show_label=False, placeholder="corrections...", lines=15) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| toPrevMistake = gr.Button("Find prev mistake", variant="primary") | |
| with gr.Column(scale=1): | |
| toNextMistake = gr.Button("Find next mistake", variant="primary") | |
| toText.click( | |
| image2Text, | |
| [upload_image, langChoice], | |
| [recognized_text], | |
| #show_progress=True, | |
| ) | |
| toNextMistake.click(text2NextMistake , [recognized_text, langChoice, current_line, session_data], [current_line, correction_text, session_data]) | |
| toPrevMistake.click(text2PrevMistake , [recognized_text, langChoice, current_line, session_data], [current_line, correction_text, session_data]) | |
| demo.queue().launch(share=False, inbrowser=True) |