Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| print(f"Gradio version: {gr.__version__}") | |
| # Load env | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| print("HF_TOKEN loaded:", bool(HF_TOKEN)) | |
| # Free HF model | |
| client = InferenceClient( | |
| model="meta-llama/Llama-3.2-1B-Instruct", | |
| token=HF_TOKEN | |
| ) | |
| # Global vector store | |
| vector_store = None | |
| def process_pdf(pdf_file): | |
| """Load and process PDF into vector store""" | |
| global vector_store | |
| if pdf_file is None: | |
| return "Please upload a PDF file." | |
| # Load PDF | |
| loader = PyPDFLoader(pdf_file.name) | |
| documents = loader.load() | |
| # Split into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50 | |
| ) | |
| chunks = text_splitter.split_documents(documents) | |
| # Create embeddings and vector store | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vector_store = FAISS.from_documents(chunks, embeddings) | |
| return f"✅ PDF processed! {len(chunks)} chunks created. You can now ask questions." | |
| def get_bot_response(message, history): | |
| global vector_store | |
| context = "" | |
| # If PDF is loaded, retrieve relevant context | |
| if vector_store is not None: | |
| docs = vector_store.similarity_search(message, k=3) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| # Build the prompt | |
| if context: | |
| system_prompt = f"""You are a helpful AI assistant. Answer questions based on the provided context. | |
| If the answer is not in the context, say so. | |
| Context: | |
| {context}""" | |
| else: | |
| system_prompt = "You are a helpful AI assistant." | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": message} | |
| ] | |
| response = client.chat.completions.create( | |
| messages=messages, | |
| max_tokens=500, | |
| temperature=0.7, | |
| ) | |
| return response.choices[0].message.content | |
| def clear_pdf(): | |
| """Clear the uploaded PDF""" | |
| global vector_store | |
| vector_store = None | |
| return "PDF cleared. Chat is now in general mode." | |
| # Gradio UI | |
| with gr.Blocks(title="🤖 AI Chatbot with PDF") as demo: | |
| gr.Markdown("# 🤖 AI Chatbot with PDF Support") | |
| gr.Markdown("Upload a PDF to chat about its contents, or just chat normally.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| upload_btn = gr.Button("Process PDF", variant="primary") | |
| clear_btn = gr.Button("Clear PDF") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| with gr.Column(scale=2): | |
| chatbot_display = gr.Chatbot(label="Chat", height=400) | |
| msg_input = gr.Textbox(label="Your message", placeholder="Type your message here...") | |
| clear_chat_btn = gr.Button("Clear Chat") | |
| # Chat history state - using messages format for Gradio 6.x | |
| chat_history = gr.State([]) | |
| def respond(message, history): | |
| if not message: | |
| return "", history, history | |
| bot_response = get_bot_response(message, history) | |
| # Gradio 6.x requires messages format with role and content | |
| history = history + [ | |
| gr.ChatMessage(role="user", content=message), | |
| gr.ChatMessage(role="assistant", content=bot_response) | |
| ] | |
| return "", history, history | |
| def clear_chat(): | |
| return [], [] | |
| msg_input.submit(respond, [msg_input, chat_history], [msg_input, chatbot_display, chat_history]) | |
| clear_chat_btn.click(clear_chat, outputs=[chatbot_display, chat_history]) | |
| upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status]) | |
| clear_btn.click(clear_pdf, outputs=[status]) | |
| demo.launch() |