import streamlit as st
from transformers import AutoTokenizer, pipeline
from peft import PeftModel
from transformers import AutoModelForSequenceClassification

LORA_MODEL = "cyberpole/gemma-3-270m-it-peft-SFTreward_v2"
BASE_MODEL = "google/gemma-3-270m-it"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForSequenceClassification.from_pretrained(
    BASE_MODEL
)
if LORA_MODEL is None:
    model = base_model
else:
    model = PeftModel.from_pretrained(
        base_model,
        LORA_MODEL
    )
pipe = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer
)


st.title("Red or Not ?")

with st.form("main_form"):
    st.subheader("Enter the text you want to analyze")
    text = st.text_area('Enter text:') # text is stored in this variable
    submitted = st.form_submit_button("Analyse")
    if submitted:
        out = pipe(text)
        if isinstance(out, list):
            label = out[0].get('label')
            score = out[0].get('score')
            label2out = {'LABEL_0':'NOT COMMUNIST :cry:', 'LABEL_1':'✊🚩 :red[COMMUNIST] ☭'}
            out = f"{label2out.get(label)} (score={score})"
        else:
            out = f"Warning: output has type {type(out)}. It contains {str(out)}"
        st.write("Sentiment of Text: ")
        st.write(out)

st.markdown("""### FAQ
* **Which architecture?** This is a transformer
* **Do we need to build a new nuclear plant for this app?** Nah, it's just a tiny SLM infering on a few CPU cores
* **How was it trained?** A classification head has been added to a pre-trained open-weight model using labelled data
* **Which data?** Political articles and speeches, mostly in French or English, sometimes in other European languages. It contains both institutional and revolutionary organisations, and avoids assigning human labels to those whose classification would be debatable. 
""")