Update app.py
Browse files
app.py
CHANGED
|
@@ -110,58 +110,35 @@ if userinput and api_key and st.button("Extract Claims", key="claims_extraction"
|
|
| 110 |
|
| 111 |
# Display generated objectives for all chunks
|
| 112 |
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
|
| 113 |
-
with st.expander("See transcript"):
|
| 114 |
-
st.markdown(transcript)
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
"Select the model you want to use:",
|
| 119 |
-
["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo", "gpt-4-0314", "gpt-4-0613", "gpt-4"]
|
| 120 |
-
)
|
| 121 |
|
| 122 |
-
#
|
| 123 |
-
|
| 124 |
-
# userinput = st.text_input("Input Text:", "Freeform text here!") # Commented out, as it's updated above
|
| 125 |
|
| 126 |
-
# Initialize
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
#
|
| 131 |
-
st.
|
| 132 |
-
# Initialize autogenerated objectives
|
| 133 |
-
claims_extraction = ""
|
| 134 |
-
# Initialize status placeholder
|
| 135 |
-
learning_status_placeholder = st.empty()
|
| 136 |
-
disable_button_bool = False
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
input_chunks = chunk_text(userinput)
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
claims_extraction_response = openai.ChatCompletion.create(
|
| 151 |
-
model=model_choice,
|
| 152 |
-
messages=[
|
| 153 |
-
{"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."}
|
| 154 |
-
]
|
| 155 |
-
)
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
# Append the extracted claims from the current chunk to the overall results
|
| 161 |
-
all_extracted_claims += claims_extraction.strip()
|
| 162 |
-
|
| 163 |
-
# Save the generated objectives to session state
|
| 164 |
-
st.session_state.claims_extraction = all_extracted_claims
|
| 165 |
-
|
| 166 |
-
# Display generated objectives for all chunks
|
| 167 |
-
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
|
|
|
|
| 110 |
|
| 111 |
# Display generated objectives for all chunks
|
| 112 |
learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
from transformers import AutoConfig, AutoTokenizer, AutoModel
|
| 115 |
+
from summarizer import Summarizer
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
# Define the BERT-based model name
|
| 118 |
+
model_name = 'nlpaueb/legal-bert-base-uncased'
|
|
|
|
| 119 |
|
| 120 |
+
# Initialize BERT-based model and tokenizer
|
| 121 |
+
custom_config = AutoConfig.from_pretrained(model_name)
|
| 122 |
+
custom_config.output_hidden_states = True
|
| 123 |
+
custom_tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 124 |
+
custom_model = AutoModel.from_pretrained(model_name, config=custom_config)
|
| 125 |
+
bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer)
|
| 126 |
+
print('Using model {}\n'.format(model_name))
|
| 127 |
|
| 128 |
+
# Get the extracted claims from Streamlit's session state
|
| 129 |
+
claims_extracted = st.session_state.claims_extraction
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
# Define the chunk size
|
| 132 |
+
chunk_size = 350
|
|
|
|
| 133 |
|
| 134 |
+
# Split the extracted claims into chunks
|
| 135 |
+
chunks = [claims_extracted[i:i+chunk_size] for i in range(0, len(claims_extracted), chunk_size)]
|
| 136 |
|
| 137 |
+
# Process each chunk with the BERT-based model
|
| 138 |
+
summaries = []
|
| 139 |
+
for chunk in chunks:
|
| 140 |
+
summary = bert_legal_model(chunk, min_length=8, ratio=0.05)
|
| 141 |
+
summaries.append(summary)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
# Now you have a list of summaries for each chunk
|
| 144 |
+
# You can access them using `summaries[0]`, `summaries[1]`, etc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|