Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.document_loaders import PyMuPDFLoader | |
| from langchain_text_splitters import CharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain_community.llms import HuggingFacePipeline | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| def create_qa_system(): | |
| try: | |
| # Validate PDF | |
| if not os.path.exists("file.pdf"): | |
| raise FileNotFoundError("Upload PDF via Files tab") | |
| # Process PDF | |
| loader = PyMuPDFLoader("file.pdf") | |
| documents = loader.load() | |
| if len(documents) == 0: | |
| raise ValueError("PDF is empty or corrupted") | |
| # Split text | |
| text_splitter = CharacterTextSplitter( | |
| chunk_size=300, | |
| chunk_overlap=50 | |
| ) | |
| texts = text_splitter.split_documents(documents) | |
| # Create embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| # Build vector store | |
| db = FAISS.from_documents(texts, embeddings) | |
| # Initialize local model with LangChain wrapper | |
| model_name = "google/flan-t5-small" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| pipe = pipeline( | |
| "text2text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_length=128, | |
| temperature=0.2, | |
| device_map="auto" | |
| ) | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| return RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=db.as_retriever(search_kwargs={"k": 2})) | |
| except Exception as e: | |
| raise gr.Error(f"Initialization failed: {str(e)}") | |
| # Initialize system | |
| try: | |
| qa = create_qa_system() | |
| except Exception as e: | |
| print(f"Fatal error: {str(e)}") | |
| raise | |
| def chat_response(message, history): | |
| try: | |
| response = qa({"query": message}) | |
| return response["result"] | |
| except Exception as e: | |
| print(f"Error during query: {str(e)}") | |
| return f"⚠️ Error: {str(e)[:100]}" | |
| gr.ChatInterface(chat_response).launch() |