File size: 2,563 Bytes
b764097
 
18785ea
ee2ecae
ff66372
2ca25d6
ff66372
2ca25d6
ee2ecae
 
 
78be535
281002f
e83b5d0
 
 
41c74da
281002f
41c74da
 
2088fae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1edf848
b764097
 
 
 
 
 
 
1edf848
 
b764097
 
672ab3e
b764097
 
 
 
 
1edf848
 
 
b764097
 
 
 
 
 
 
 
 
 
 
 
1edf848
b83c881
1edf848
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
import pandas as pd
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoTokenizer
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
import transformers
import torch
import textwrap
import os




from huggingface_hub import login
login(token = os.environ['HF_TOKEN'])


model = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
                                    "text-generation", #task
                                    model=model,
                                    tokenizer=tokenizer,
                                    torch_dtype=torch.bfloat16,
                                    trust_remote_code=True,
                                    device_map="auto",
                                    max_length=1000,
                                    do_sample=True,
                                    top_k=10,
                                    num_return_sequences=1,
                                    eos_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

def main(dataset,qs):
  #df = pd.read_csv(dataset.name)
  chain = RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=False, retriever=vectorstore.as_retriever())
  #query = "What is the annual salary of Sophie Silva?"
  result=chain(qs)
  wrapped_text = textwrap.fill(result['result'], width=500)
  return wrapped_text


def dataset_change(dataset):
    global vectorstore
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})
    loader = CSVLoader(dataset.name, encoding="utf-8", csv_args={'delimiter': ','})
    data = loader.load()
    vectorstore = FAISS.from_documents(data, embeddings)
    df = pd.read_csv(dataset.name)
    return df.head(5)



with gr.Blocks() as demo:
    with gr.Row():
      with gr.Column():
        data = gr.File()
        qs = gr.Text(label="Input Question")
        submit_btn = gr.Button("Submit")
      with gr.Column():
        answer = gr.Text(label="Output Answer")
    with gr.Row():
      dataframe = gr.Dataframe()
    submit_btn.click(main, inputs=[data,qs], outputs=[answer])
    data.change(fn=dataset_change,inputs = data,outputs=[dataframe])

demo.launch(debug=True)