Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyMuPDFLoader, TextLoader | |
| from langchain_text_splitters import CharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain_community.llms import HuggingFacePipeline | |
| from transformers import pipeline, AutoTokenizer | |
| def load_documents(file_path="study_materials"): | |
| documents = [] | |
| for filename in os.listdir(file_path): | |
| path = os.path.join(file_path, filename) | |
| if filename.endswith(".pdf"): | |
| loader = PyMuPDFLoader(path) | |
| documents.extend(loader.load()) | |
| elif filename.endswith(".txt"): | |
| loader = TextLoader(path) | |
| documents.extend(loader.load()) | |
| return documents | |
| def create_qa_system(): | |
| try: | |
| # Load documents | |
| documents = load_documents() | |
| if not documents: | |
| raise ValueError("๐ No study materials found") | |
| # Text splitting | |
| text_splitter = CharacterTextSplitter( | |
| chunk_size=1100, | |
| chunk_overlap=200, | |
| separator="\n\n" | |
| ) | |
| texts = text_splitter.split_documents(documents) | |
| # Embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| # Vector store | |
| db = FAISS.from_documents(texts, embeddings) | |
| # LLM setup with proper LangChain wrapper | |
| tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base") | |
| pipe = pipeline( | |
| "text2text-generation", | |
| model="google/flan-t5-base", | |
| tokenizer=tokenizer, | |
| max_length=600, | |
| temperature=0.7, | |
| do_sample=True, | |
| top_k=50, | |
| device=-1 | |
| ) | |
| # Wrap pipeline in LangChain component | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| # Create QA chain | |
| return RetrievalQA.from_llm( | |
| llm=llm, | |
| retriever=db.as_retriever(search_kwargs={"k": 3}), | |
| return_source_documents=True | |
| ) | |
| except Exception as e: | |
| raise gr.Error(f"Error: {str(e)}") | |
| # Initialize system | |
| try: | |
| qa = create_qa_system() | |
| except Exception as e: | |
| print(f"Startup failed: {str(e)}") | |
| raise | |
| def ask_question(question, history): | |
| try: | |
| result = qa.invoke({"query": question}) | |
| answer = result["result"] | |
| sources = list({doc.metadata['source'] for doc in result['source_documents']}) | |
| return f"{answer}\n\n๐ Sources: {', '.join(sources)}" | |
| except Exception as e: | |
| return f"Error: {str(e)[:150]}" | |
| gr.ChatInterface( | |
| ask_question, | |
| title="Study Assistant", | |
| description="Upload PDF/TXT files in 'study_materials' folder and ask questions!", | |
| theme="soft" | |
| ).launch() |