RISE

Build error

App Files Files Community

Nolsafan commited on Jan 31

Commit

8bed30e

verified ·

1 Parent(s): 8d8d5a2

Delete rag_demo.py

Browse files

Files changed (1) hide show

rag_demo.py +0 -84

rag_demo.py DELETED Viewed

@@ -1,84 +0,0 @@
-from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
-from langchain_community.vectorstores import FAISS
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.output_parsers import StrOutputParser
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-import torch
-# Step A: Choose an embedding model (turns text into vectors for searching)
-# This one is small and fast; from Hugging Face hub.
-embed_model_id = "BAAI/bge-small-en-v1.5"
-embeddings = HuggingFaceEmbeddings(
-    model_name=embed_model_id,
-    model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
-)
-# Step B: Add your documents (replace with your own text, or load from files)
-texts = [
-    "Kragujevac is a city in central Serbia founded in the 15th century.",
-    "The main industry in Kragujevac includes automotive manufacturing.",
-    "Famous landmarks: The Šumarice Memorial Park and the Old Foundry Museum."
-    # Add more! For PDFs, use libraries like PyPDF2 to extract text.
-]
-# Split long texts into chunks for better retrieval
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)
-docs = text_splitter.create_documents(texts)
-# Step C: Build the vector store (database of document vectors)
-vectorstore = FAISS.from_documents(docs, embeddings)
-retriever = vectorstore.as_retriever(search_kwargs={"k": 3})  # Retrieve top 3 matches
-# Step D: Choose a language model (LLM) from Hugging Face
-# This is a small, capable model. If it asks for a token, add hf_token="your_token_here"
-# Step D: Choose a language model (LLM) from Hugging Face
-model_id = "Qwen/Qwen2.5-0.5B-Instruct"  # ← Smaller than 3B: faster load, less RAM (~3–5 GB needed)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map="cpu",          # Force CPU – no GPU attempt
-    torch_dtype=torch.float32  # Full precision, safe for CPU (remove bfloat16!)
-    # IMPORTANT: No BitsAndBytesConfig / no load_in_4bit / no quantization here
-)
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=200,
-    temperature=0.7,
-    do_sample=True
-)
-llm = HuggingFacePipeline(pipeline=pipe)
-# Step E: Define the prompt template (instructions for the AI)
-template = """You are a helpful assistant. Use only the provided context to answer.
-If unsure, say "I don't know."
-Context: {context}
-Question: {question}
-Answer:"""
-prompt = ChatPromptTemplate.from_template(template)
-# Step F: Chain it all together (retrieval + prompt + LLM)
-def format_docs(docs):
-    return "\n\n".join(doc.page_content for doc in docs)
-rag_chain = (
-    {"context": retriever | format_docs, "question": RunnablePassthrough()}
-    | prompt
-    | llm
-    | StrOutputParser()
-)
-# Step G: Test it!
-question = "What are some landmarks in Kragujevac?"
-print("Question:", question)
-print("Answer:", rag_chain.invoke(question))