Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from llama_index.llms.llama_cpp import LlamaCPP | |
| from llama_index.llms.llama_cpp.llama_utils import ( | |
| messages_to_prompt, | |
| completion_to_prompt, | |
| ) | |
| model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf' | |
| llm = LlamaCPP( | |
| # You can pass in the URL to a GGML model to download it automatically | |
| model_url=model_url, | |
| temperature=0.1, | |
| max_new_tokens=256, | |
| context_window=2048, | |
| # kwargs to pass to __call__() | |
| generate_kwargs={}, | |
| # kwargs to pass to __init__() | |
| # set to at least 1 to use GPU | |
| model_kwargs={"n_gpu_layers": 1}, | |
| # transform inputs into Llama2 format | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| verbose=True, | |
| ) | |
| # Initialize embeddings and LLM | |
| embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") | |
| def initialize_index(): | |
| """Initialize the vector store index from PDF files in the data directory""" | |
| # Load documents from the data directory | |
| loader = SimpleDirectoryReader( | |
| input_dir="data", | |
| required_exts=[".pdf"] | |
| ) | |
| documents = loader.load_data() | |
| # Create index | |
| index = VectorStoreIndex.from_documents( | |
| documents, | |
| embed_model=embeddings, | |
| ) | |
| # Return query engine with Llama | |
| return index.as_query_engine(llm=llm) | |
| # Initialize the query engine at startup | |
| query_engine = initialize_index() | |
| def process_query( | |
| message: str, | |
| history: list[tuple[str, str]], | |
| ) -> str: | |
| """Process a query using the RAG system""" | |
| try: | |
| # Get response from the query engine | |
| response = query_engine.query( | |
| message, | |
| #streaming=True | |
| ) | |
| return str(response) | |
| except Exception as e: | |
| return f"Error processing query: {str(e)}" | |
| # Create the Gradio interface | |
| demo = gr.ChatInterface( | |
| process_query, | |
| title="PDF Question Answering with RAG + Llama", | |
| description="Ask questions about the content of the loaded PDF documents using Llama model", | |
| #undo_btn="Delete Previous", | |
| #clear_btn="Clear", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) |