File size: 1,998 Bytes
6f40514 12037f9 2ac37c9 432e638 b5f9c61 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 2ac37c9 432e638 9c73ed3 2ac37c9 432e638 e7f1b3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
documents = SimpleDirectoryReader("Data").load_data()
import torch
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path=None,
temperature=0.1,
max_new_tokens=256,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=3900,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": -1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
from llama_index import ServiceContext
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)
service_context = ServiceContext.from_defaults(
chunk_size=256,
llm=llm,
embed_model=embed_model
)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
#response = query_engine.query("What is Fibromyalgia?")
import gradio as gr
def text_to_uppercase(text):
response=query_engine.query(text)
return response
iface = gr.Interface(fn=text_to_uppercase, inputs="text", outputs="text")
iface.launch() |