import streamlit as st

def calculate_match_percentage(user_seq, reference_seq):
    """Calculates the match percentage and highlights matches and mismatches."""
    matches = 0
    highlighted = ""

    for u, r in zip(user_seq, reference_seq):
        if u == r:
            matches += 1
            highlighted += f"[{u}](#008000)"  # Green for match
        else:
            highlighted += f"[{u}](#FF0000)"  # Red for mismatch

    match_percentage = (matches / len(reference_seq)) * 100
    return match_percentage, highlighted

def count_sequence_occurrences(sequence, substring):
    """Counts the occurrences of a substring in the sequence."""
    return sequence.count(substring)

def main():
    st.title("DNA Sequence Matcher")

    st.write("This application compares a DNA sequence you provide to a reference DNA sequence, calculates the match percentage, and finds occurrences of a specific sequence in the reference.")

    # Input fields
    reference_seq = st.text_input("Enter the reference DNA sequence:", "ACGTACGTACGT")
    user_seq = st.text_input("Enter your DNA sequence:")

    # Input for substring to search
    substring = st.text_input("Enter the DNA sequence to count its occurrences in the reference:")

    if len(user_seq) != len(reference_seq):
        st.warning("The length of your DNA sequence must match the length of the reference sequence.")
    elif user_seq and st.button("Compare"):
        match_percentage, highlighted = calculate_match_percentage(user_seq, reference_seq)

        st.markdown(f"### Match Percentage: {match_percentage:.2f}%")
        st.markdown("### Highlighted Sequence:")
        st.markdown(f"{highlighted}", unsafe_allow_html=True)

    # Check for substring occurrences
    if substring:
        count = count_sequence_occurrences(reference_seq, substring)
        st.markdown(f"### Occurrences of '{substring}' in the reference sequence: {count}")

if __name__ == "__main__":
    main()