Spaces:

AIEcosystem
/

AcademiaMiner

Sleeping

App Files Files Community

AIEcosystem commited on Sep 12, 2025

Commit

1c2d55a

verified ·

1 Parent(s): 1ec2a68

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +21 -64

src/streamlit_app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import os
 os.environ['HF_HOME'] = '/tmp'
 import time
 import streamlit as st
 import pandas as pd
@@ -15,14 +13,11 @@ from comet_ml import Experiment
 # --- App Configuration and Styling ---
 st.set_page_config(
     layout="wide",
-    page_title="English Keyphrase"
-)
 st.markdown(
     """
     <style>
-    .stApp {
         background-color: #f0f8ff; /* A single, solid color */
         color: #000000;
         font-family: 'Inter', sans-serif;
@@ -52,45 +47,26 @@ st.markdown(
     </style>
     """,
-    unsafe_allow_html=True
-)
 # --- Comet ML Setup ---
 COMET_API_KEY = os.environ.get("COMET_API_KEY")
 COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 if not comet_initialized:
     st.warning("Comet ML not initialized. Check environment variables.")
 # --- UI Header and Notes ---
 st.subheader("AcademiaMiner", divider="rainbow")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
 expander.write('''**Entities:** This AcademiaMiner extracts keyphrases from English academic and scientific papers.
-Results are presented in easy-to-read tables, visualized in an interactive tree map and a bar chart, and are available for download along with a Glossary of tags.
-**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
-**Usage Limits:** You can request results unlimited times for one (1) month.
-**Supported Languages:** English
-**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
-For any errors or inquiries, please contact us at info@nlpblogs.com''')
 with st.sidebar:
     st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
     code = '''
-    <iframe
-	src="https://aiecosystem-academiaminer.hf.space"
-	frameborder="0"
-	width="850"
-	height="450"
-    ></iframe>
     '''
     st.code(code, language="html")
     st.text("")
@@ -98,7 +74,6 @@ with st.sidebar:
     st.divider()
     st.subheader("🚀 Ready to build your own AI Web App?", divider="rainbow")
     st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 # --- Model Loading ---
 @st.cache_resource
 def load_ner_model():
@@ -112,25 +87,28 @@ def load_ner_model():
     except Exception as e:
         st.error(f"Failed to load NER model: {e}")
         st.stop()
 model = load_ner_model()
 # --- Main App Logic ---
-text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
 def clear_text():
     """Clears the text area."""
     st.session_state['my_text_area'] = ""
     st.session_state.text_processed = False
 st.button("Clear text", on_click=clear_text)
 if st.button("Results"):
     if not text.strip():
         st.warning("Please enter some text to extract keyphrases.")
     else:
         start_time_overall = time.time()
         # Initialize Comet ML experiment at the start
         experiment = None
         if comet_initialized:
@@ -143,12 +121,10 @@ if st.button("Results"):
             except Exception as e:
                 st.warning(f"Could not initialize Comet ML experiment: {e}")
                 experiment = None
         try:
             with st.spinner("Analyzing text...", ):
                 # The pipeline model returns a list of dictionaries.
                 entities = model(text)
                 data = []
                 for entity in entities:
                     # 'ml6team/keyphrase-extraction-kbir-inspec' model doesn't have 'entity_group'
@@ -160,41 +136,30 @@ if st.button("Results"):
                         'start': entity['start'],
                         'end': entity['end']
                     })
                 if not data:
                     st.warning("No keyphrases found in the text.")
                     st.stop()
                 df = pd.DataFrame(data)
                 # --- Data Cleaning and Processing ---
                 pattern = r'[^\w\s]'
                 df['word'] = df['word'].replace(pattern, '', regex=True)
                 df = df.replace('', 'Unknown')
                 # --- All Extracted Keyphrases ---
                 st.subheader("All Extracted Keyphrases", divider="rainbow")
                 st.dataframe(df, use_container_width=True)
                 with st.expander("See Glossary of tags"):
                     st.write('''
                     **word**: ['keyphrase extracted from your text data']
                     **score**: ['accuracy score; how accurately a tag has been assigned']
                     **label**: ['label (tag) assigned to a given extracted keyphrase']
                     **start**: ['index of the start of the corresponding entity']
                     **end**: ['index of the end of the corresponding entity']
                     ''')
                 # --- Most Frequent Keyphrases ---
                 st.subheader("Most Frequent Keyphrases", divider="rainbow")
                 word_counts = df['word'].value_counts().reset_index()
                 word_counts.columns = ['word', 'count']
                 df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)
                 if not df_frequent.empty:
                     tab1, tab2 = st.tabs(["Table", "Chart"])
                     with tab1:
@@ -214,13 +179,11 @@ if st.button("Results"):
                             paper_bgcolor='#f0f8ff', # Sets the background color of the entire figure
                             plot_bgcolor='#f0f8ff' # Sets the background color of the plotting area
                         )
                         st.plotly_chart(fig_frequent_bar, use_container_width=True)
                         if experiment:
                             experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
                 else:
                     st.info("No keyphrases found with more than one occurrence.")
                 # --- Treemap of All Keyphrases ---
                 st.subheader("Treemap of All Keyphrases", divider="rainbow")
                 # Use 'label' instead of 'entity_group'
@@ -235,7 +198,6 @@ if st.button("Results"):
                 st.plotly_chart(fig_treemap, use_container_width=True)
                 if experiment:
                     experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
                 # --- Download Section ---
                 dfa = pd.DataFrame(
                     data={
@@ -254,7 +216,6 @@ if st.button("Results"):
                     myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
                     myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
                     myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
                 with stylable_container(
                     key="download_button",
                     css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
@@ -266,7 +227,6 @@ if st.button("Results"):
                         mime="application/zip",
                     )
                 st.divider()
         except Exception as e:
             st.error(f"An unexpected error occurred during processing: {e}")
         finally:
@@ -279,10 +239,7 @@ if st.button("Results"):
                     experiment.end()
                 except Exception as comet_e:
                     st.warning(f"Comet ML experiment.end() failed: {comet_e}")
-            # Show elapsed time
-            end_time_overall = time.time()
-            elapsed_time_overall = end_time_overall - start_time_overall
-            st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")

 import os
 os.environ['HF_HOME'] = '/tmp'
 import time
 import streamlit as st
 import pandas as pd
 # --- App Configuration and Styling ---
 st.set_page_config(
     layout="wide",
+    page_title="English Keyphrase")
 st.markdown(
     """
     <style>
+       .stApp {
         background-color: #f0f8ff; /* A single, solid color */
         color: #000000;
         font-family: 'Inter', sans-serif;
     </style>
     """,
+    unsafe_allow_html=True)
 # --- Comet ML Setup ---
 COMET_API_KEY = os.environ.get("COMET_API_KEY")
 COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 if not comet_initialized:
     st.warning("Comet ML not initialized. Check environment variables.")
 # --- UI Header and Notes ---
 st.subheader("AcademiaMiner", divider="rainbow")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
 expander.write('''**Entities:** This AcademiaMiner extracts keyphrases from English academic and scientific papers.
+    Results are presented in easy-to-read tables, visualized in an interactive tree map and a bar chart, and are available for download along with a Glossary of tags.
+    **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.**Usage Limits:** You can request results unlimited times for one (1) month.**Supported Languages:** English**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com''')
 with st.sidebar:
     st.write("Use the following code to embed the AcademiaMiner web app on your website. Feel free to adjust the width and height values to fit your page.")
     code = '''
+    <iframe	src="https://aiecosystem-academiaminer.hf.space"	frameborder="0"	width="850"	height="450"
+    ></iframe>
     '''
     st.code(code, language="html")
     st.text("")
     st.divider()
     st.subheader("🚀 Ready to build your own AI Web App?", divider="rainbow")
     st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 # --- Model Loading ---
 @st.cache_resource
 def load_ner_model():
     except Exception as e:
         st.error(f"Failed to load NER model: {e}")
         st.stop()
 model = load_ner_model()
 # --- Main App Logic ---
+# Define the word limit
+word_limit = 200
+# Update text area with the word limit
+text = st.text_area(f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", height=250, key='my_text_area')
+# Calculate and display the word count
+word_count = len(text.split())
+st.markdown(f"**Word count:** {word_count}/{word_limit}")
 def clear_text():
     """Clears the text area."""
     st.session_state['my_text_area'] = ""
     st.session_state.text_processed = False
 st.button("Clear text", on_click=clear_text)
 if st.button("Results"):
+    # Check for word limit and empty text first
     if not text.strip():
         st.warning("Please enter some text to extract keyphrases.")
+    elif word_count > word_limit:
+        st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
     else:
         start_time_overall = time.time()
         # Initialize Comet ML experiment at the start
         experiment = None
         if comet_initialized:
             except Exception as e:
                 st.warning(f"Could not initialize Comet ML experiment: {e}")
                 experiment = None
         try:
             with st.spinner("Analyzing text...", ):
                 # The pipeline model returns a list of dictionaries.
                 entities = model(text)
                 data = []
                 for entity in entities:
                     # 'ml6team/keyphrase-extraction-kbir-inspec' model doesn't have 'entity_group'
                         'start': entity['start'],
                         'end': entity['end']
                     })
                 if not data:
                     st.warning("No keyphrases found in the text.")
                     st.stop()
                 df = pd.DataFrame(data)
                 # --- Data Cleaning and Processing ---
                 pattern = r'[^\w\s]'
                 df['word'] = df['word'].replace(pattern, '', regex=True)
                 df = df.replace('', 'Unknown')
                 # --- All Extracted Keyphrases ---
                 st.subheader("All Extracted Keyphrases", divider="rainbow")
                 st.dataframe(df, use_container_width=True)
                 with st.expander("See Glossary of tags"):
                     st.write('''
                     **word**: ['keyphrase extracted from your text data']
                     **score**: ['accuracy score; how accurately a tag has been assigned']
                     **label**: ['label (tag) assigned to a given extracted keyphrase']
                     **start**: ['index of the start of the corresponding entity']
                     **end**: ['index of the end of the corresponding entity']
                     ''')
                 # --- Most Frequent Keyphrases ---
                 st.subheader("Most Frequent Keyphrases", divider="rainbow")
                 word_counts = df['word'].value_counts().reset_index()
                 word_counts.columns = ['word', 'count']
                 df_frequent = word_counts.sort_values(by='count', ascending=False).head(15)
                 if not df_frequent.empty:
                     tab1, tab2 = st.tabs(["Table", "Chart"])
                     with tab1:
                             paper_bgcolor='#f0f8ff', # Sets the background color of the entire figure
                             plot_bgcolor='#f0f8ff' # Sets the background color of the plotting area
                         )
                         st.plotly_chart(fig_frequent_bar, use_container_width=True)
                         if experiment:
                             experiment.log_figure(figure=fig_frequent_bar, figure_name="frequent_keyphrases_bar_chart")
                 else:
                     st.info("No keyphrases found with more than one occurrence.")
                 # --- Treemap of All Keyphrases ---
                 st.subheader("Treemap of All Keyphrases", divider="rainbow")
                 # Use 'label' instead of 'entity_group'
                 st.plotly_chart(fig_treemap, use_container_width=True)
                 if experiment:
                     experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
                 # --- Download Section ---
                 dfa = pd.DataFrame(
                     data={
                     myzip.writestr("Summary_of_results.csv", df.to_csv(index=False))
                     myzip.writestr("Most_frequent_keyphrases.csv", df_frequent.to_csv(index=False))
                     myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
                 with stylable_container(
                     key="download_button",
                     css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
                         mime="application/zip",
                     )
                 st.divider()
         except Exception as e:
             st.error(f"An unexpected error occurred during processing: {e}")
         finally:
                     experiment.end()
                 except Exception as comet_e:
                     st.warning(f"Comet ML experiment.end() failed: {comet_e}")
+        # Show elapsed time
+        end_time_overall = time.time()
+        elapsed_time_overall = end_time_overall - start_time_overall
+        st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")