Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,11 +2,11 @@ import streamlit as st
|
|
| 2 |
import re
|
| 3 |
|
| 4 |
# Use a pipeline as a high-level helper
|
| 5 |
-
# Load model directly
|
| 6 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
|
| 8 |
-
tokenizer = AutoTokenizer.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
|
| 9 |
-
model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
|
| 10 |
# import requests
|
| 11 |
# import os
|
| 12 |
# token=os.environ.get("HUGGING_FACE_TOKEN")
|
|
@@ -19,6 +19,11 @@ model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v
|
|
| 19 |
# response = requests.post(API_URL, headers=headers, json=payload)
|
| 20 |
# return response.json()
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def convert_to_dictionary(input_string):
|
| 23 |
try:
|
| 24 |
input_string = input_string.replace('</s>', '')
|
|
@@ -126,24 +131,26 @@ def ner_title(title):
|
|
| 126 |
B_in, E_in = "[Title]", "[/Title]"
|
| 127 |
# Format your prompt template
|
| 128 |
prompt = f"""{B_INST} {B_SYS} You are a helpful assistant that provides accurate and concise responses. {E_SYS}\nExtract named entities from the given product title. Provide the output in JSON format.\n{B_in} {title.strip()} {E_in}\n{E_INST}\n\n### NER Response:\n{{"{title.split()[0].lower()}"""
|
|
|
|
|
|
|
| 129 |
# output = query({
|
| 130 |
# "inputs": prompt,
|
| 131 |
# })
|
| 132 |
|
| 133 |
-
encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
|
| 134 |
-
output = model.generate(input_ids=encoding.input_ids,
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
|
| 142 |
|
| 143 |
|
| 144 |
# Subtract the length of input_ids from output to get only the model's response
|
| 145 |
-
output_text = tokenizer.decode(output[0, len(encoding.input_ids[0]):], skip_special_tokens=False)
|
| 146 |
-
output = re.sub('\n+', '\n', output_text) # remove excessive newline characters
|
| 147 |
#output = f"""{{\"{title.split()[0].lower()} {output_text}"""
|
| 148 |
#output = re.sub(' ": "', '": "', output)
|
| 149 |
|
|
|
|
| 2 |
import re
|
| 3 |
|
| 4 |
# Use a pipeline as a high-level helper
|
| 5 |
+
# # Load model directly
|
| 6 |
+
# from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
|
| 8 |
+
# tokenizer = AutoTokenizer.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
|
| 9 |
+
# model = AutoModelForCausalLM.from_pretrained("shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
|
| 10 |
# import requests
|
| 11 |
# import os
|
| 12 |
# token=os.environ.get("HUGGING_FACE_TOKEN")
|
|
|
|
| 19 |
# response = requests.post(API_URL, headers=headers, json=payload)
|
| 20 |
# return response.json()
|
| 21 |
|
| 22 |
+
# Use a pipeline as a high-level helper
|
| 23 |
+
from transformers import pipeline
|
| 24 |
+
|
| 25 |
+
pipe = pipeline("text-generation", model="shivanikerai/TinyLlama-1.1B-Chat-v1.0-sku-title-ner-generation-reversed-v1.0")
|
| 26 |
+
|
| 27 |
def convert_to_dictionary(input_string):
|
| 28 |
try:
|
| 29 |
input_string = input_string.replace('</s>', '')
|
|
|
|
| 131 |
B_in, E_in = "[Title]", "[/Title]"
|
| 132 |
# Format your prompt template
|
| 133 |
prompt = f"""{B_INST} {B_SYS} You are a helpful assistant that provides accurate and concise responses. {E_SYS}\nExtract named entities from the given product title. Provide the output in JSON format.\n{B_in} {title.strip()} {E_in}\n{E_INST}\n\n### NER Response:\n{{"{title.split()[0].lower()}"""
|
| 134 |
+
output= pipe(prompt)
|
| 135 |
+
|
| 136 |
# output = query({
|
| 137 |
# "inputs": prompt,
|
| 138 |
# })
|
| 139 |
|
| 140 |
+
# encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
|
| 141 |
+
# output = model.generate(input_ids=encoding.input_ids,
|
| 142 |
+
# attention_mask=encoding.attention_mask,
|
| 143 |
+
# max_new_tokens=512,
|
| 144 |
+
# do_sample=True,
|
| 145 |
+
# temperature=0.01,
|
| 146 |
+
# eos_token_id=tokenizer.eos_token_id,
|
| 147 |
+
# top_k=0)
|
| 148 |
|
| 149 |
|
| 150 |
|
| 151 |
# Subtract the length of input_ids from output to get only the model's response
|
| 152 |
+
# output_text = tokenizer.decode(output[0, len(encoding.input_ids[0]):], skip_special_tokens=False)
|
| 153 |
+
# output = re.sub('\n+', '\n', output_text) # remove excessive newline characters
|
| 154 |
#output = f"""{{\"{title.split()[0].lower()} {output_text}"""
|
| 155 |
#output = re.sub(' ": "', '": "', output)
|
| 156 |
|