import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
import re

# Load TrOCR model for handwriting recognition
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Load pre-trained QA model for grading
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# Function to preprocess the image
def preprocess_image(image_file):
    image = np.array(Image.open(image_file).convert("RGB"))
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    preprocessed_image = cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB)
    return Image.fromarray(preprocessed_image)

# Function to extract text using Tesseract OCR
def extract_text_with_tesseract(image):
    return pytesseract.image_to_string(image)

# Function to extract text using TrOCR
def extract_text_with_trocr(image):
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values)
    extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return extracted_text

# Extract student name and roll number
def extract_student_info(text):
    name = re.search(r"NAME\s*=\s*([\w\s]+)", text, re.IGNORECASE)
    roll_no = re.search(r"Roll\s*NO\s*=\s*(\d+)", text, re.IGNORECASE)
    student_name = name.group(1).strip() if name else "Unknown"
    roll_number = roll_no.group(1).strip() if roll_no else "Unknown"
    return student_name, roll_number

# Extract questions from the text
def extract_questions_from_text(text):
    questions = re.findall(r'(?:[^\n]*\?)', text)
    return questions

# Grade answers
def grade_answer(question, context):
    result = qa_pipeline(question=question, context=context)
    return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect"

# Streamlit App
st.title("Handwritten Answer Sheet Grading System")
st.write("Upload an image or handwritten file to process.")

# Upload image
uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg", "jpeg"])

if uploaded_image:
    st.image(uploaded_image, caption="Original Image", use_container_width=True)

    # Preprocess the image
    preprocessed_image = preprocess_image(uploaded_image)
    st.image(preprocessed_image, caption="Preprocessed Image", use_container_width=True)

    # Attempt text extraction with Tesseract
    st.subheader("Extracted Text:")
    tesseract_text = extract_text_with_tesseract(preprocessed_image)
    if len(tesseract_text.strip()) > 10:
        extracted_text = tesseract_text  # Use Tesseract output if it seems valid
    else:
        extracted_text = extract_text_with_trocr(preprocessed_image)  # Use TrOCR fallback

    st.text(extracted_text)

    # Extract student info
    student_name, roll_number = extract_student_info(extracted_text)
    st.subheader(f"Student Name: {student_name}")
    st.subheader(f"Roll No: {roll_number}")

    # Extract questions
    questions = extract_questions_from_text(extracted_text)
    st.subheader("Extracted Questions")
    for i, question in enumerate(questions):
        st.write(f"Q{i+1}: {question}")

    # Grade the answers
    st.subheader("Grading Results")
    for question in questions:
        score, feedback = grade_answer(question, extracted_text)
        st.write(f"**Question:** {question}")
        st.write(f"**Score:** {score:.2f}")
        st.write(f"**Feedback:** {feedback}")
        st.write("---")