File size: 4,304 Bytes
947c232 4dcfe25 947c232 4dcfe25 947c232 4dcfe25 efa6eba 4dcfe25 947c232 efa6eba 947c232 4dcfe25 947c232 4dcfe25 947c232 4dcfe25 947c232 4dcfe25 947c232 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import asyncio
import json
import os
import pickle
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
MODEL_NAME = "TheBloke/Llama-2-7B-GPTQ"
# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
text_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
# Define prompt template
template = """
<s>[INST] <<SYS>>
Use the following information to answer the question at the end.
<</SYS>>
{context}
{question} [/INST]
"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
# Path to cache files
CACHE_DIR = 'cache'
os.makedirs(CACHE_DIR, exist_ok=True)
def save_cache(filename, data):
with open(os.path.join(CACHE_DIR, filename), 'wb') as f:
pickle.dump(data, f)
def load_cache(filename):
try:
with open(os.path.join(CACHE_DIR, filename), 'rb') as f:
return pickle.load(f)
except FileNotFoundError:
return None
async def process_files(file_paths):
try:
print("Processing files...")
docs = []
for file_path in file_paths:
if file_path.endswith('.pdf'):
loader = PDFMinerLoader(file_path)
docs.extend(await asyncio.to_thread(loader.load))
elif file_path.endswith('.csv'):
loader = CSVLoader(file_path)
docs.extend(loader.load())
elif file_path.endswith('.json'):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
docs.append(data)
print("Files loaded.")
text_splitter = SentenceTransformersTokenTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(docs)
print("Text split into chunks.")
embeddings = HuggingFaceEmbeddings(
model_name="thenlper/gte-large",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True},
)
db = FAISS.from_documents(texts, embeddings)
print("FAISS index created.")
save_cache('cache_key', (texts, db, embeddings))
return texts, db, embeddings
except Exception as e:
print(f"Error: {e}")
return None, None, str(e)
async def query_files(files, question):
if not files or not question.strip():
return "Please upload valid files and enter a question."
print("Starting query processing...")
file_paths = [file.name for file in files]
texts, db, embeddings = await process_files(file_paths)
if db is None:
print("Error during processing.")
return f"Error processing files: {embeddings}"
print("Processing complete.")
results = db.similarity_search(question, k=5)
context = " ".join([result.page_content for result in results])
prompt_text = prompt.format(context=context, question=question)
generated_text = text_pipeline(prompt_text)[0]['generated_text']
return generated_text
def process_and_query(files, question):
return asyncio.run(query_files(files, question))
with gr.Blocks() as interface:
gr.Markdown("### Retrieval Augmented Generation (RAG) for LLM Local Trial")
gr.Markdown(
"Upload multiple files (PDF, CSV, JSON) and ask a question. The app will generate the answer based on the content of the input files.")
with gr.Row():
question_input = gr.Textbox(label="Enter your question", lines=3)
files_input = gr.File(label="Upload Files", type="filepath", file_count="multiple") # Multiple file input
submit_button = gr.Button("Submit")
output_text = gr.Textbox(label="LLM Response", lines=8)
submit_button.click(process_and_query, inputs=[files_input, question_input], outputs=output_text)
if __name__ == "__main__":
interface.launch()
|