|
|
import logging |
|
|
import sys |
|
|
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO) |
|
|
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) |
|
|
|
|
|
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext |
|
|
|
|
|
|
|
|
documents = SimpleDirectoryReader("Data").load_data() |
|
|
|
|
|
import torch |
|
|
|
|
|
from llama_index.llms import LlamaCPP |
|
|
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt |
|
|
llm = LlamaCPP( |
|
|
|
|
|
model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', |
|
|
|
|
|
model_path=None, |
|
|
temperature=0.1, |
|
|
max_new_tokens=256, |
|
|
|
|
|
context_window=3900, |
|
|
|
|
|
generate_kwargs={}, |
|
|
|
|
|
|
|
|
model_kwargs={"n_gpu_layers": -1}, |
|
|
|
|
|
messages_to_prompt=messages_to_prompt, |
|
|
completion_to_prompt=completion_to_prompt, |
|
|
verbose=True, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from langchain.embeddings.huggingface import HuggingFaceEmbeddings |
|
|
from llama_index.embeddings import LangchainEmbedding |
|
|
from llama_index import ServiceContext |
|
|
|
|
|
embed_model = LangchainEmbedding( |
|
|
HuggingFaceEmbeddings(model_name="thenlper/gte-large") |
|
|
) |
|
|
|
|
|
|
|
|
service_context = ServiceContext.from_defaults( |
|
|
chunk_size=256, |
|
|
llm=llm, |
|
|
embed_model=embed_model |
|
|
) |
|
|
|
|
|
index = VectorStoreIndex.from_documents(documents, service_context=service_context) |
|
|
|
|
|
query_engine = index.as_query_engine() |
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
def text_to_uppercase(text): |
|
|
response=query_engine.query(text) |
|
|
return response |
|
|
|
|
|
iface = gr.Interface(fn=text_to_uppercase, inputs="text", outputs="text") |
|
|
iface.launch() |