Spaces:

bruce1113
/

llm

Sleeping

App Files Files Community

bruce1113 commited on 27 days ago

Commit

f969350

verified ·

1 Parent(s): 7546cf2

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +77 -148

src/streamlit_app.py CHANGED Viewed

@@ -2,15 +2,16 @@ import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# -----------------------------
 # Model config
-# -----------------------------
-MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
 @st.cache_resource
-def load_qwen_model():
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    # Use float16 on GPU if available, else float32 on CPU
     dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
@@ -20,14 +21,13 @@ def load_qwen_model():
     model.to(device)
     return tokenizer, model, device
-tokenizer, model, device = load_qwen_model()
-# -----------------------------
-# Prompt / behavior config
-# -----------------------------
 SYSTEM_PROMPT = """
 You are taking part in a research study on how people read summaries.
 You will be given the transcript of an audio clip that a participant listened to.
 Your job is to write a single dense paragraph that summarizes the audio.
@@ -40,13 +40,40 @@ Follow these rules very carefully:
 4. Do NOT mark which details are incorrect, and do NOT mention that some facts are invented.
 5. Use clear, natural language and a neutral tone.
 6. Do NOT use bullet points, headings, or lists. Only one continuous paragraph.
 """
-def generate_summary_from_transcript(transcript_text: str) -> str:
     """
-    Use Qwen to generate a dense, slightly hallucination-seeded summary
-    from the given transcript.
     """
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {
@@ -56,11 +83,12 @@ Here is the transcript of the audio the participant listened to:
 \"\"\"{transcript_text}\"\"\"
-Write the summary following the rules.
 """,
         },
     ]
     inputs = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True,
@@ -72,152 +100,53 @@ Write the summary following the rules.
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=300,
             do_sample=True,
-            temperature=0.9,
             top_p=0.95,
-            repetition_penalty=1.05,
         )
-    # Only decode the newly generated tokens after the prompt
     generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
-    summary = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
-    return summary
-# -----------------------------
-# Streamlit UI
-# -----------------------------
-st.set_page_config(page_title="LLM Study", layout="wide")
-st.title("Ask")
-# Sidebar
-with st.sidebar:
-    st.header("Instructions (Researcher)")
-    st.markdown(
-        """
-1. Have the participant listen to the audio **outside** this app (or in another tab).
-2. Paste the **transcript** of that audio into the text box.
-3. Click **Generate summary**.
-4. Show the generated paragraph to the participant for reading / annotation / whatever your protocol specifies.
-You can also upload a `.txt` file containing the transcript.
-"""
-    )
-    st.markdown("---")
-    st.caption(f"Model: `{MODEL_NAME}`")
-# -----------------------------
-# Input area
-# -----------------------------
-col_left, col_right = st.columns([2, 1])
-with col_left:
-    st.subheader("Transcript input")
-    uploaded_file = st.file_uploader(
-        "Optional: upload a .txt file with the transcript",
-        type=["txt"],
-        help="If provided, its content will be loaded into the transcript box below.",
-    )
-    # We store transcript in session_state to allow re-editing after upload
-    if "transcript_text" not in st.session_state:
-        st.session_state.transcript_text = ""
-    if uploaded_file is not None:
-        file_bytes = uploaded_file.read()
-        try:
-            st.session_state.transcript_text = file_bytes.decode("utf-8")
-        except UnicodeDecodeError:
-            st.warning("Could not decode file as UTF-8. Please check the file encoding.")
-    transcript_text = st.text_area(
-        "Transcript of the audio (paste or edit here):",
-        value=st.session_state.transcript_text,
-        height=300,
-    )
-    # Keep session_state in sync with edits
-    st.session_state.transcript_text = transcript_text
-with col_right:
-    st.subheader("Generation controls")
-    max_new_tokens = st.slider("Max new tokens", 128, 512, 300, step=32)
-    temperature = st.slider("Temperature (creativity)", 0.1, 1.5, 0.9, step=0.1)
-    top_p = st.slider("Top-p (nucleus sampling)", 0.1, 1.0, 0.95, step=0.05)
-    st.caption(
-        """
-Higher temperature / top-p generally increases variation and may strengthen or increase
-the hallucinated details. For a controlled study, you might keep these fixed.
-"""
-    )
-# Re-bind hyperparameters into the generation function without changing its signature
-def generate_summary_from_transcript_with_params(transcript_text: str) -> str:
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
         {
-            "role": "user",
-            "content": f"""
-Here is the transcript of the audio the participant listened to:
-\"\"\"{transcript_text}\"\"\"
-Write the summary following the rules.
-""",
-        },
     ]
-    inputs = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        tokenize=True,
-        return_dict=True,
-        return_tensors="pt",
-    ).to(device)
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=int(max_new_tokens),
-            do_sample=True,
-            temperature=float(temperature),
-            top_p=float(top_p),
-            repetition_penalty=1.05,
-        )
-    generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
-    summary = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
-    return summary
-# -----------------------------
-# Generate button + output
-# -----------------------------
-st.markdown("---")
-generate_clicked = st.button("Generate summary")
-if generate_clicked:
-    if not transcript_text.strip():
-        st.warning("Please provide a transcript (paste text or upload a .txt file).")
-    else:
-        with st.spinner("Generating summary with Qwen2.5-3B-Instruct..."):
-            summary = generate_summary_from_transcript_with_params(transcript_text)
-        st.subheader("Model-generated summary (show this to participant):")
-        st.write(summary)
-        with st.expander("Show transcript (for researcher)"):
-            st.text(transcript_text)
-        with st.expander("Debug info (for researcher)"):
-            st.json(
-                {
-                    "max_new_tokens": max_new_tokens,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                    "transcript_chars": len(transcript_text),
-                }
-            )

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# ------------------------------------------------
 # Model config
+# ------------------------------------------------
+# If it's too slow, you can change this to:
+# "Qwen/Qwen2.5-0.5B-Instruct"  (much faster)
+MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
 @st.cache_resource
+def load_model():
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
     model.to(device)
     return tokenizer, model, device
+tokenizer, model, device = load_model()
+# ------------------------------------------------
+# System prompt (for your user study)
+# ------------------------------------------------
 SYSTEM_PROMPT = """
 You are taking part in a research study on how people read summaries.
 You will be given the transcript of an audio clip that a participant listened to.
 Your job is to write a single dense paragraph that summarizes the audio.
 4. Do NOT mark which details are incorrect, and do NOT mention that some facts are invented.
 5. Use clear, natural language and a neutral tone.
 6. Do NOT use bullet points, headings, or lists. Only one continuous paragraph.
+The user will usually paste the transcript of the audio as their message.
+Just respond with the summary paragraph.
+"""
+transcript_text =
+"""
+Virginia Du Fray was one of the most prominent accusers of notorious US sex offender Jeffrey Epstein.
+I've been fighting that very world to this day and I won't stop fighting. The 41-year-old died by
+suicide at her property north of Perth in April this year, leaving behind a significant
+estate, but no valid will. Now, a legal stash is underway in Perth's Supreme Court over access to
+Mr. Jafrey's estate, which is thought to be worth millions. The court has appointed an interim administrator to oversee
+the estate after Ms. Jafrey's teenage sons applied to be the administrators, prompting a counter suit launched by Ms. Jup Fray's lawyer,
+Carrie Lden, and her former friend and carer, Cheryl Meyers. The court today heard that their counter claim, if successful, would see M.
+Jupy's aranged husband, Robert, removed from his entitlements to her estate.Once copies of those pleadings are provided to the
+media uh then you will be able to establish the basis for that counter claim. WA Supreme Court registar Danielle Davies told
+the court Jafrey's former husband and her young daughter should be added to the proceedings. The case is expected to resume
+in the new year. Rian Shine, ABC News.
 """
+def chat_with_qwen(chat_history):
     """
+    chat_history: list of {"role": "user"/"assistant", "content": str}
+    We treat the **last user message** as the transcript text.
+    Returns the assistant's reply string (the summary paragraph).
     """
+    # 1) Get the most recent user message = transcript text
+    for msg in reversed(chat_history):
+        if msg["role"] == "user":
+            transcript_text = msg["content"]
+            break
+    # 2) Build messages: system prompt + one user turn containing the transcript
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
         {
 \"\"\"{transcript_text}\"\"\"
+Write the summary following the rules in the system prompt.
 """,
         },
     ]
+    # 3) Apply Qwen chat template and generate
     inputs = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True,
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=200,      # one dense paragraph
             do_sample=True,
+            temperature=0.8,
             top_p=0.95,
         )
+    # 4) Decode only the new tokens after the prompt
     generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
+    reply = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
+    return reply
+# ------------------------------------------------
+# Simple chat UI (original style)
+# ------------------------------------------------
+st.set_page_config(page_title="Simple LLM", page_icon="💬")
+st.title("💬 Simple LLM")
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state["messages"] = [
         {
+            "role": "assistant",
+            "content": "Hi!"
+        }
     ]
+# Display chat history
+for msg in st.session_state["messages"]:
+    with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+# Chat input (simple, original format)
+user_input = st.chat_input("Paste transcript or ask something...")
+if user_input:
+    # Add user message
+    st.session_state["messages"].append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    # Generate model reply
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            reply = chat_with_qwen(st.session_state["messages"])
+            st.markdown(reply)
+    # Save reply to history
+    st.session_state["messages"].append({"role": "assistant", "content": reply})