File size: 1,998 Bytes
6f40514
 
12037f9
2ac37c9
 
 
432e638
 
 
b5f9c61
2ac37c9
432e638
 
 
 
2ac37c9
432e638
2ac37c9
432e638
2ac37c9
 
 
432e638
2ac37c9
432e638
2ac37c9
432e638
 
2ac37c9
432e638
2ac37c9
 
 
 
 
432e638
 
 
 
 
 
 
2ac37c9
432e638
2ac37c9
 
432e638
2ac37c9
 
 
 
 
 
 
432e638
2ac37c9
432e638
2ac37c9
432e638
 
 
9c73ed3
 
2ac37c9
432e638
e7f1b3d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext


documents = SimpleDirectoryReader("Data").load_data()

import torch

from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": -1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)




from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
from llama_index import ServiceContext

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)


service_context = ServiceContext.from_defaults(
    chunk_size=256,
    llm=llm,
    embed_model=embed_model
)

index = VectorStoreIndex.from_documents(documents, service_context=service_context)

query_engine = index.as_query_engine()
#response = query_engine.query("What is Fibromyalgia?")

import gradio as gr

def text_to_uppercase(text):
    response=query_engine.query(text)
    return response

iface = gr.Interface(fn=text_to_uppercase, inputs="text", outputs="text")
iface.launch()