| import gradio as gr |
| from langchain_community.document_loaders import PyPDFLoader, TextLoader |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from langchain_huggingface.llms import HuggingFacePipeline |
| from langchain_community.vectorstores import Chroma |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer |
|
|
| |
| model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| model = AutoModelForCausalLM.from_pretrained(model_id) |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200) |
| llm = HuggingFacePipeline(pipeline=pipe) |
|
|
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
| vector_db = None |
|
|
| def process_file(file): |
| global vector_db |
| loader = PyPDFLoader(file.name) if file.name.endswith('.pdf') else TextLoader(file.name) |
| chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100).split_documents(loader.load()) |
| vector_db = Chroma.from_documents(documents=chunks, embedding=embeddings) |
| return "✅ Indexed successfully!" |
|
|
| def ask_bot(question): |
| if not vector_db: return "⚠️ Upload a file first." |
| docs = vector_db.similarity_search(question, k=2) |
| context = "\n".join([d.page_content for d in docs]) |
| prompt = f"<|system|>\nUse the context to answer.</s>\n<|user|>\nContext: {context}\n\nQuestion: {question}</s>\n<|assistant|>" |
| return llm.invoke(prompt) |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# ⚡ Hugging Face RAG") |
| file_input = gr.File() |
| btn_index = gr.Button("Index") |
| status = gr.Textbox() |
| chat_input = gr.Textbox(label="Ask a Question") |
| chat_output = gr.Textbox(label="Answer") |
| |
| btn_index.click(process_file, [file_input], [status]) |
| chat_input.submit(ask_bot, [chat_input], [chat_output]) |
|
|
| demo.launch() |