Spaces:

spark-nlp
/

TAPAS

Sleeping

App Files Files Community

abdullahmubeen10 commited on Aug 10, 2024

Commit

4f033e8

verified ·

1 Parent(s): 7187099

Update Demo.py

Browse files

Files changed (1) hide show

Demo.py +28 -22

Demo.py CHANGED Viewed

@@ -2,15 +2,12 @@ import streamlit as st
 import sparknlp
 import pandas as pd
 import json
-import os
 from sparknlp.base import *
 from sparknlp.annotator import *
 from pyspark.ml import Pipeline
 from sparknlp.pretrained import PretrainedPipeline
-os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3.8'
 # Page configuration
 st.set_page_config(
     layout="wide",
@@ -40,13 +37,7 @@ st.markdown("""
 @st.cache_resource
 def init_spark():
-    from pyspark.sql import SparkSession
-    spark = SparkSession.builder \
-        .appName("App") \
-        .config("spark.pyspark.python", "/usr/bin/python3.8") \
-        .config("spark.pyspark.driver.python", "/usr/bin/python3.8") \
-        .getOrCreate()
-    return spark
 @st.cache_resource
 def create_pipeline(model):
@@ -75,11 +66,20 @@ def create_pipeline(model):
     pipeline = Pipeline(stages=[document_assembler, sentence_detector, table_assembler, tapas_wtq, tapas_sqa])
     return pipeline
 def fit_data(pipeline, json_data, question):
     spark_df = spark.createDataFrame([[json_data, question]]).toDF("table_json", "questions")
     model = pipeline.fit(spark_df)
-    result = model.transform(spark_df)
-    return result.select("answers_wtq.result", "answers_sqa.result").collect()
 # Sidebar content
 model = st.sidebar.selectbox(
@@ -90,19 +90,19 @@ model = st.sidebar.selectbox(
 # Set up the page layout
 title = 'TAPAS for Table-Based Question Answering with Spark NLP'
-sub_title = (
-    'TAPAS (Table Parsing Supervised via Pre-trained Language Models) is a model that extends '
-    'the BERT architecture to handle tabular data. Unlike traditional models that require flattening '
-    'tables into text, TAPAS can directly interpret tables, making it a powerful tool for answering '
-    'questions that involve tabular data.'
-)
 st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
 st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
 # Reference notebook link in sidebar
 link = """
-<a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Public/15.1_Table_Question_Answering.ipynb">
     <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
 </a>
 """
@@ -149,7 +149,6 @@ queries = [
     "How many billionaires are in the conglomerate industry?"
 ]
 # Load the JSON data into a DataFrame and display it
 table_data = json.loads(json_data)
 df_table = pd.DataFrame(table_data["rows"], columns=table_data["header"])
@@ -180,5 +179,12 @@ output = fit_data(pipeline, table_json_str, text_to_analyze)
 # Display the output
 st.markdown("---")
-st.subheader("Processed output:")
-st.write("**Answer:**", ', '.join(output[0][0]))

 import sparknlp
 import pandas as pd
 import json
 from sparknlp.base import *
 from sparknlp.annotator import *
 from pyspark.ml import Pipeline
 from sparknlp.pretrained import PretrainedPipeline
 # Page configuration
 st.set_page_config(
     layout="wide",
 @st.cache_resource
 def init_spark():
+    return sparknlp.start()
 @st.cache_resource
 def create_pipeline(model):
     pipeline = Pipeline(stages=[document_assembler, sentence_detector, table_assembler, tapas_wtq, tapas_sqa])
     return pipeline
+def fit_data(pipeline, data):
+  empty_df = spark.createDataFrame([['']]).toDF('text')
+  pipeline_model = pipeline.fit(empty_df)
+  model = LightPipeline(pipeline_model)
+  result = model.fullAnnotate(data)
+  return result
 def fit_data(pipeline, json_data, question):
     spark_df = spark.createDataFrame([[json_data, question]]).toDF("table_json", "questions")
     model = pipeline.fit(spark_df)
+    lightPipelineModel = LightPipeline(model)
+    result = lightPipelineModel.fullAnnotate(data)
+    st.write(result)
+    return result
 # Sidebar content
 model = st.sidebar.selectbox(
 # Set up the page layout
 title = 'TAPAS for Table-Based Question Answering with Spark NLP'
+sub_title = ("""
+TAPAS (Table Parsing Supervised via Pre-trained Language Models) enhances the BERT architecture to effectively process tabular data, allowing it to answer complex questions about tables without needing to convert them into text.<br>
+<br>
+<strong>table_qa_tapas_base_finetuned_wtq:</strong> This model excels at answering questions that require aggregating data across the entire table, such as calculating sums or averages.<br>
+<strong>table_qa_tapas_base_finetuned_sqa:</strong> This model is designed for sequential question-answering tasks where the answer to each question may depend on the context provided by previous answers.
+""")
 st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
 st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
 # Reference notebook link in sidebar
 link = """
+<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/NER_HINDI_ENGLISH.ipynb">
     <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
 </a>
 """
     "How many billionaires are in the conglomerate industry?"
 ]
 # Load the JSON data into a DataFrame and display it
 table_data = json.loads(json_data)
 df_table = pd.DataFrame(table_data["rows"], columns=table_data["header"])
 # Display the output
 st.markdown("---")
+st.subheader("Processed Output")
+# # Check if output is available
+# if output:
+#     results_wtq = output[0][0] if output[0][0] else "No results found."
+#     results_sqa = output[0][1] if output[0][1] else "No results found."
+#     st.markdown(f"**Answers from WTQ model:** {', '.join(results_wtq)}")
+#     st.markdown(f"**Answers from SQA model:** {', '.join(results_sqa)}")