OcrWAiCorrection / gemini_correction.py
Vlad Bastina
model
ccc03f8
raw
history blame contribute delete
994 Bytes
import google.generativeai as genai
import streamlit as st
class AiCorrection:
def __init__(self):
genai.configure(api_key=st.secrets['GOOGLE_API_KEY'])
system_instructions = '''Role: You are an AI that corrects text extracted by OCR, ensuring it matches the original document.
Input: Raw text output from OCR, which may contain errors such as misspellings, incorrect formatting, or missing characters.
Task:
- Correct all OCR errors to accurately reflect the original document.
- Preserve the original formatting, punctuation, and special characters.
- Do not add, remove, or alter any content beyond necessary corrections.
Output: Only the corrected text, with no explanations or additional comments.'''
self.model = genai.GenerativeModel(model_name='gemini-2.5-flash',system_instruction=system_instructions)
def correct_output(self , text:str)->str:
response = self.model.generate_content(text)
return response.text