Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from langchain_community.vectorstores import FAISS
|
|
| 9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 10 |
from langchain_community.llms import HuggingFacePipeline
|
| 11 |
from transformers import BitsAndBytesConfig
|
|
|
|
| 12 |
|
| 13 |
from langchain.prompts import PromptTemplate
|
| 14 |
from langchain.schema.runnable import RunnablePassthrough
|
|
@@ -24,19 +25,11 @@ import transformers
|
|
| 24 |
model_name='mistralai/Mistral-7B-Instruct-v0.1'
|
| 25 |
from huggingface_hub import login
|
| 26 |
login(token=st.secrets["HF_TOKEN"])
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
|
| 30 |
-
model_file="mistral-7b-instruct-v0.1.Q5_K_M.gguf",
|
| 31 |
-
model_type="mistral",
|
| 32 |
-
max_new_tokens=1048,
|
| 33 |
-
temperature=0.01,
|
| 34 |
-
hf=True
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
#initializes a tokenizer for the specified LLM model.
|
| 38 |
-
tokenizer = AutoTokenizer.from_pretrained(model)
|
| 39 |
-
dataset= load_dataset("mery22/testub/test-1.pdf")
|
| 40 |
loader = PyPDFLoader(dataset)
|
| 41 |
data = loader.load()
|
| 42 |
text_splitter1 = CharacterTextSplitter(chunk_size=512, chunk_overlap=0,separator="\n\n")
|
|
|
|
| 9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 10 |
from langchain_community.llms import HuggingFacePipeline
|
| 11 |
from transformers import BitsAndBytesConfig
|
| 12 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
| 13 |
|
| 14 |
from langchain.prompts import PromptTemplate
|
| 15 |
from langchain.schema.runnable import RunnablePassthrough
|
|
|
|
| 25 |
model_name='mistralai/Mistral-7B-Instruct-v0.1'
|
| 26 |
from huggingface_hub import login
|
| 27 |
login(token=st.secrets["HF_TOKEN"])
|
| 28 |
+
llm = HuggingFaceEndpoint(
|
| 29 |
+
repo_id=repo_id, max_length=128, temperature=0.5, token=st.secrets["HF_TOKEN"]
|
| 30 |
+
)
|
| 31 |
|
| 32 |
+
dataset= load_dataset("test-1.pdf")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
loader = PyPDFLoader(dataset)
|
| 34 |
data = loader.load()
|
| 35 |
text_splitter1 = CharacterTextSplitter(chunk_size=512, chunk_overlap=0,separator="\n\n")
|