Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import gradio as gr | |
| from groq import Groq | |
| # LangChain components | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| # --- 1. SETUP & API KEYS --- | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| GDRIVE_LINKS = [ | |
| "https://drive.google.com/file/d/12bS7b-Q3qdbnwCRcTynXjMj1IyKFzBJl/view?usp=sharing" | |
| ] | |
| def download_gdrive_pdf(url, output_path): | |
| try: | |
| file_id = url.split('/')[-2] | |
| download_url = f'https://drive.google.com/uc?export=download&id={file_id}' | |
| response = requests.get(download_url) | |
| if response.status_code == 200: | |
| with open(output_path, 'wb') as f: | |
| f.write(response.content) | |
| return True | |
| except: | |
| return False | |
| return False | |
| # --- 2. KNOWLEDGE BASE INITIALIZATION --- | |
| print("Initializing Knowledge Base...") | |
| all_chunks = [] | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=150) | |
| for i, link in enumerate(GDRIVE_LINKS): | |
| filename = f"doc_{i}.pdf" | |
| if download_gdrive_pdf(link, filename): | |
| loader = PyPDFLoader(filename) | |
| all_chunks.extend(text_splitter.split_documents(loader.load())) | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| vector_db = FAISS.from_documents(all_chunks, embeddings) | |
| print("System Ready.") | |
| # --- 3. RAG LOGIC --- | |
| def respond(message, history): | |
| docs = vector_db.similarity_search(message, k=5) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| system_prompt = f""" | |
| You are a professional Knowledge Assistant. | |
| 1. Answer the question using ONLY the provided context. | |
| 2. If the answer is not in the context, say: "I'm sorry, I couldn't find that in the documents." | |
| 3. Be concise and factual. | |
| CONTEXT: | |
| {context} | |
| """ | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": message}, | |
| ], | |
| model="llama-3.3-70b-versatile", | |
| temperature=0.1, | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # --- 4. MODERN UI --- | |
| custom_css = """ | |
| footer {visibility: hidden} | |
| .gradio-container { background-color: #fcfcfc; } | |
| #chatbot-container { border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } | |
| """ | |
| # Theme and CSS move to launch() or can be defined here if using newer syntax | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 📑 Intellect-Doc AI") | |
| gr.Markdown("Ask questions based on your specialized document library.") | |
| # Removed type="messages" as it's now default | |
| chatbot = gr.ChatInterface( | |
| fn=respond, | |
| chatbot=gr.Chatbot(height=550, elem_id="chatbot-container"), | |
| textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7) | |
| ) | |
| if __name__ == "__main__": | |
| # Theme and CSS applied here to fix the UserWarning | |
| demo.launch(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) |