Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pdfplumber, re | |
| from transformers import pipeline, AutoTokenizer | |
| st.set_page_config(page_title="Financial News Analyzer", | |
| page_icon="π°", | |
| layout="wide") | |
| # βββββββββββββββββ Cached pipelines ββββββββββββββββββββββββββββββββββββ | |
| def load_pipes(): | |
| summarizer = pipeline("summarization", model=SUMM_MODEL) | |
| tokenizer = AutoTokenizer.from_pretrained(SUMM_MODEL) | |
| sentiment = pipeline("text-classification", model=SENT_MODEL) | |
| ner = pipeline("token-classification", model=NER_MODEL, | |
| aggregation_strategy="simple") | |
| return summarizer, tokenizer, sentiment, ner | |
| # βββββββββββββββββ Helper functions ββββββββββββββββββββββββββββββββββββ | |
| # Read text from PDF | |
| def extract_pdf(file): | |
| text = "" | |
| with pdfplumber.open(file) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| # Split text to avoid long contents | |
| def split_by_tokens(text, max_tokens): | |
| words = re.split(r"(\s+)", text) | |
| buf, n = "", 0 | |
| for w in words: | |
| ln = len(TOK(w).input_ids) | |
| if n + ln <= max_tokens: | |
| buf, n = buf + w, n + ln | |
| else: | |
| yield buf.strip() | |
| buf, n = w, ln | |
| if buf.strip(): | |
| yield buf.strip() | |
| # Summarise the news | |
| def summarise(text): | |
| parts = list(split_by_tokens(text, MAX_TOK)) | |
| per_len = max(25, min(80, TARGET_WORDS // max(1, len(parts)))) | |
| first_pass = [ | |
| SUMMAR(p, max_length=per_len, min_length=per_len // 2, do_sample=False)[0]["summary_text"] | |
| for p in parts | |
| ] | |
| joined = " ".join(first_pass) | |
| if len(joined.split()) > TARGET_WORDS: | |
| joined = SUMMAR(joined, max_length=TARGET_WORDS, | |
| min_length=TARGET_WORDS // 2, | |
| do_sample=False)[0]["summary_text"] | |
| return joined | |
| # Shorten the summary in 1-5 sentence (let user to choose) | |
| def shorten(summary, n_sentences): | |
| s = summary.split(". ") | |
| return (". ".join(s[:n_sentences]).rstrip(".") + ".") if len(s) > n_sentences else summary | |
| # Key entity tagging | |
| def tag_entities(text): | |
| tag_dict = {"Organization": [], "Person": [], "Location": [], "Miscellaneous": []} | |
| for entity in NER(text): | |
| group = {"ORG": "Organization", "PER": "Person", "LOC": "Location"}.get(entity["entity_group"], "Miscellaneous") | |
| tag_dict[group].append(entity["word"]) | |
| return {k: sorted(set(v)) for k, v in tag_dict.items() if v} | |
| # βββββββββββββββββ Main App Logic ββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| st.title("π° Financial News Analyzer") | |
| st.markdown("##### Instantly grasp news content, sentiment, and relevant entities") | |
| # Sidebar Input | |
| with st.sidebar: | |
| st.header("Input News to Analyze:") | |
| # Step 1: enter the news | |
| txt_input = st.text_area("Paste news article", height=150) | |
| pdf_file = st.file_uploader("Or upload PDF", type=["pdf"]) | |
| # let user choose summary length (1-5 sentence) | |
| sent_count = st.slider("Summary length (sentences)", min_value=1, max_value=5, value=3) | |
| run_btn = st.button("π Analyze", use_container_width=True) | |
| raw_text = extract_pdf(pdf_file) if pdf_file else txt_input.strip() | |
| # Main processing | |
| if run_btn: | |
| if not raw_text: | |
| st.warning("Please provide text or a PDF first.") | |
| st.stop() | |
| with st.spinner("Analyzing..."): | |
| full_sum = summarise(raw_text) | |
| summary = shorten(full_sum, sent_count) | |
| # Step 2: Summarization | |
| cols = st.columns([2, 1]) | |
| with cols[0]: | |
| st.subheader("π Summary") | |
| st.markdown(f"<div style='white-space: pre-wrap'>{summary}</div>", unsafe_allow_html=True) | |
| # Step 3: Sentiment analysis | |
| with cols[1]: | |
| result = SENT_CLF(summary)[0] | |
| label = LABEL_MAP.get(result["label"], result["label"]) | |
| color = COLOR_MAP[label] | |
| st.subheader("π Sentiment") | |
| st.markdown( | |
| f"<h3 style='color:{color};margin-bottom:0'>{label}</h3>" | |
| f"{result['score'] * 100:.1f}% Confidence</p>", | |
| unsafe_allow_html=True | |
| ) | |
| # Step 3: Entity Tags | |
| tags = tag_entities(summary) | |
| st.subheader("π·οΈ Relevant Tags") | |
| if tags: | |
| # Tag pill CSS | |
| pill_css = """ | |
| <style> | |
| .tag-pill { | |
| display: inline-block; | |
| background: #f0f2f6; | |
| color: #333; | |
| padding: 4px 10px; | |
| margin: 2px 4px 2px 0; | |
| border-radius: 12px; | |
| font-size: 0.9em; | |
| } | |
| .tag-cat { | |
| font-weight: 600; | |
| margin-top: 0; | |
| margin-bottom: 4px; | |
| } | |
| </style> | |
| """ | |
| st.markdown(pill_css, unsafe_allow_html=True) | |
| for category, values in tags.items(): | |
| st.markdown(f"<div class='tag-cat'>{category}</div>", unsafe_allow_html=True) | |
| pills = "".join(f"<span class='tag-pill'>{v}</span>" for v in values) | |
| st.markdown(pills, unsafe_allow_html=True) | |
| else: | |
| st.info("No entities detected.") | |
| # βββββββββββββββββ Main Part βββββββββββββββββββββββββββββββββββββββ | |
| # models and other constant variables | |
| SUMM_MODEL = "sshleifer/distilbart-cnn-12-6" | |
| SENT_MODEL = "nynn/Fintuned_Sentiment" | |
| NER_MODEL = "Babelscape/wikineural-multilingual-ner" | |
| SUMMAR, TOK, SENT_CLF, NER = load_pipes() | |
| MAX_TOK = 1024 | |
| TARGET_WORDS = 225 | |
| LABEL_MAP = {"LABEL_0": "Negative", "LABEL_1": "Positive", "LABEL_2": "Neutral"} | |
| COLOR_MAP = {"Positive": "green", "Negative": "red", "Neutral": "gray"} | |
| if __name__ == "__main__": | |
| main() | |