wizzseen commited on
Commit
2ac37c9
·
1 Parent(s): 549d54a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import subprocess
3
+ import gradio as gr
4
+
5
+ subprocess.call(["pip ","-q","install", "sentence-transformers"])
6
+ subprocess.call(["pip","install","langchain"])
7
+ # Install pypdf
8
+ subprocess.call(["pip", "install", "-q", "pypdf"])
9
+
10
+ # Install python-dotenv
11
+ subprocess.call(["pip", "install", "-q", "python-dotenv"])
12
+
13
+ # Install transformers
14
+ subprocess.call(["pip", "install", "-q", "transformers"])
15
+
16
+ # Install llama-cpp-python with specific CMAKE_ARGS
17
+ subprocess.call(["pip", "install", "llama-cpp-python", "--no-cache-dir", "--install-option", "--CMAKE_ARGS=-DLLAMA_CUBLAS=on", "--install-option", "--FORCE_CMAKE=1"])
18
+
19
+ # Install llama-index
20
+ subprocess.call(["pip", "install", "-q", "llama-index"])
21
+
22
+
23
+
24
+ import logging
25
+ import sys
26
+
27
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
28
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
29
+
30
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
31
+
32
+
33
+ documents = SimpleDirectoryReader("/content/Data/").load_data()
34
+
35
+ import torch
36
+
37
+ from llama_index.llms import LlamaCPP
38
+ from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
39
+ llm = LlamaCPP(
40
+ # You can pass in the URL to a GGML model to download it automatically
41
+ model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf',
42
+ # optionally, you can set the path to a pre-downloaded model instead of model_url
43
+ model_path=None,
44
+ temperature=0.1,
45
+ max_new_tokens=256,
46
+ # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
47
+ context_window=3900,
48
+ # kwargs to pass to __call__()
49
+ generate_kwargs={},
50
+ # kwargs to pass to __init__()
51
+ # set to at least 1 to use GPU
52
+ model_kwargs={"n_gpu_layers": -1},
53
+ # transform inputs into Llama2 format
54
+ messages_to_prompt=messages_to_prompt,
55
+ completion_to_prompt=completion_to_prompt,
56
+ verbose=True,
57
+ )
58
+
59
+
60
+
61
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
62
+ from llama_index.embeddings import LangchainEmbedding
63
+ from llama_index import ServiceContext
64
+
65
+
66
+ embed_model = LangchainEmbedding(
67
+ HuggingFaceEmbeddings(model_name="thenlper/gte-large")
68
+ )
69
+
70
+
71
+ service_context = ServiceContext.from_defaults(
72
+ chunk_size=256,
73
+ llm=llm,
74
+ embed_model=embed_model
75
+ )
76
+
77
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
78
+
79
+ query_engine = index.as_query_engine()
80
+
81
+ def query_handler(query):
82
+ response = query_engine.query(query)
83
+ return response
84
+
85
+ # Create an interface with a text input for user query
86
+ iface = gr.Interface(
87
+ fn=query_handler,
88
+ inputs=gr.Textbox(prompt="Enter your question here..."),
89
+ outputs=gr.Textbox(),
90
+ live=True,
91
+ capture_session=True,
92
+ interpretation="query",
93
+ )
94
+
95
+ # Launch the interface
96
+ iface.launch()
97
+
98
+