surajit2839 commited on
Commit
2015eff
Β·
verified Β·
1 Parent(s): cdc593a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +22 -51
src/streamlit_app.py CHANGED
@@ -2,62 +2,33 @@ import streamlit as st
2
  from transformers import pipeline
3
  from pypdf import PdfReader
4
 
5
- # --- PAGE SETUP ---
6
- st.set_page_config(page_title="Twitter Sentiment AI", page_icon="🐦")
7
-
8
- # --- MODEL LOADING (Cached) ---
9
  @st.cache_resource
10
- def load_twitter_model():
11
- # Using the specialized Twitter RoBERTa model
12
- model_path = "cardiffnlp/twitter-roberta-base-sentiment-latest"
13
- return pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
14
-
15
- classifier = load_twitter_model()
16
 
17
- def get_pdf_text(pdf_file):
18
- reader = PdfReader(pdf_file)
19
- text = ""
20
- for page in reader.pages:
21
- text += page.extract_text()
22
- return text
23
 
24
- # --- UI ELEMENTS ---
25
  st.title("🐦 Twitter-RoBERTa Sentiment AI")
26
- st.markdown("Analyze sentiment using a model trained on **124M+ tweets**.")
27
-
28
- # Tabbed interface for Input
29
- tab1, tab2 = st.tabs(["πŸ’¬ Text Input", "πŸ“„ PDF Upload"])
30
 
31
- with tab1:
32
- user_input = st.text_area("Paste your text or tweet:", height=150)
33
 
34
- with tab2:
35
- uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
36
- if uploaded_file:
37
- with st.spinner("Extracting text from PDF..."):
38
- user_input = get_pdf_text(uploaded_file)
39
- st.info(f"Extracted {len(user_input)} characters from PDF.")
40
-
41
- if st.button("Analyze Sentiment"):
42
- if user_input.strip():
43
- with st.spinner("Analyzing with RoBERTa..."):
44
- # The Twitter model outputs labels like 'positive', 'neutral', 'negative'
45
- results = classifier(user_input[:2000]) # Truncate to avoid errors on huge PDFs
46
- label = results[0]['label']
47
- score = results[0]['score']
48
 
49
- st.divider()
 
 
 
 
50
 
51
- # Custom formatting based on labels
52
- if label.lower() == "positive":
53
- st.success(f"### Result: {label.upper()} 😊")
54
- elif label.lower() == "negative":
55
- st.error(f"### Result: {label.upper()} 😑")
56
- else:
57
- st.warning(f"### Result: {label.upper()} 😐")
58
-
59
- st.metric(label="Confidence Level", value=f"{score:.2%}")
60
- else:
61
- st.warning("Please provide some text or a PDF file.")
62
-
63
- st.caption("Note: Large PDFs are truncated to the first 2000 characters for processing speed.")
 
2
  from transformers import pipeline
3
  from pypdf import PdfReader
4
 
5
+ # --- MODEL LOADING ---
 
 
 
6
  @st.cache_resource
7
+ def load_model():
8
+ # This is the specialized Twitter model we discussed
9
+ return pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
 
 
 
10
 
11
+ classifier = load_model()
 
 
 
 
 
12
 
13
+ # --- UI ---
14
  st.title("🐦 Twitter-RoBERTa Sentiment AI")
 
 
 
 
15
 
16
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
 
17
 
18
+ if uploaded_file is not None:
19
+ try:
20
+ # Extract text from PDF
21
+ reader = PdfReader(uploaded_file)
22
+ text = ""
23
+ for page in reader.pages:
24
+ text += page.extract_text()
 
 
 
 
 
 
 
25
 
26
+ if st.button("Analyze PDF Sentiment"):
27
+ # Truncating text to stay within model limits (approx 512 tokens)
28
+ prediction = classifier(text[:1500])
29
+ label = prediction[0]['label']
30
+ score = prediction[0]['score']
31
 
32
+ st.write(f"**Result:** {label.upper()} (Confidence: {score:.2%})")
33
+ except Exception as e:
34
+ st.error(f"Error reading PDF: {e}")