Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import textwrap | |
| import torch | |
| import chromadb | |
| import langchain | |
| import openai | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader | |
| from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings | |
| from langchain.indexes import VectorstoreIndexCreator | |
| from langchain.llms import OpenAI, HuggingFacePipeline | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.prompts import PromptTemplate | |
| #from auto_gptq import AutoGPTQForCausalLM | |
| from transformers import AutoTokenizer, pipeline, logging, TextStreamer | |
| from langchain.document_loaders.image import UnstructuredImageLoader | |
| x = st.slider('Select a value') | |
| st.write(x, 'squared is', x * x) | |
| current_working_directory = os.getcwd() | |
| print(current_working_directory) | |
| st.write('current dir:', current_working_directory) | |
| arr = os.listdir('.') | |
| st.write('dir contents:',arr) | |
| def print_response(response: str): | |
| print("\n".join(textwrap.wrap(response, width=100))) | |
| pdf_loader = UnstructuredPDFLoader("./pdfs/Predicting issue types on GitHub.pdf") | |
| pdf_pages = pdf_loader.load_and_split() | |
| st.write('total pages from PDFs:', len(pdf_pages)) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=512) | |
| texts = text_splitter.split_documents(pdf_pages) | |
| st.write('total chunks from pages:', len(texts)) | |
| st.write('loading chunks into vector db') | |
| model_name = "hkunlp/instructor-large" | |
| hf_embeddings = HuggingFaceInstructEmbeddings( | |
| model_name = model_name) | |
| # db = Chroma.from_documents(texts, hf_embeddings) | |
| st.write('loading tokenizer') | |
| #model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ" | |
| model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF" | |
| #tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) | |
| model_basename = "model" | |
| use_triton = False | |
| DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| st.write('loading LLM') | |
| model = AutoGPTQForCausalLM.from_quantized(model_name_or_path, | |
| model_basename=model_basename, | |
| use_safetensors=True, | |
| trust_remote_code=True, | |
| device=DEVICE, | |
| use_triton=use_triton, | |
| quantize_config=None) | |
| st.write('setting up the chain') | |
| streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True) | |
| text_pipeline = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, streamer = streamer) | |
| llm = HuggingFacePipeline(pipeline = text_pipeline) | |
| def generate_prompt(prompt, sys_prompt): | |
| return f"[INST] <<SYS>> {sys_prompt} <</SYS>> {prompt} [/INST]" | |
| sys_prompt = "Use following piece of context to answer the question in less than 20 words" | |
| template = generate_prompt( | |
| """ | |
| {context} | |
| Question : {question} | |
| """ | |
| , sys_prompt) | |
| prompt = PromptTemplate(template=template, input_variables=["context", "question"]) | |
| chain_type_kwargs = {"prompt": prompt} | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=db.as_retriever(search_kwargs={"k": 2}), | |
| return_source_documents = True, | |
| chain_type_kwargs=chain_type_kwargs, | |
| ) | |
| st.write('READY!!!') | |
| q1="what the author worked on ?" | |
| q2="where did author study?" | |
| q3="what author did ?" | |
| result = qa_chain(q1) | |
| st.write('question:', q1, 'result:', result) | |
| result = qa_chain(q2) | |
| st.write('question:', q2, 'result:', result) | |
| result = qa_chain(q3) | |
| st.write('question:', q3, 'result:', result) | |