Nolsafan commited on
Commit
8bed30e
·
verified ·
1 Parent(s): 8d8d5a2

Delete rag_demo.py

Browse files
Files changed (1) hide show
  1. rag_demo.py +0 -84
rag_demo.py DELETED
@@ -1,84 +0,0 @@
1
- from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
2
- from langchain_community.vectorstores import FAISS
3
- from langchain_text_splitters import RecursiveCharacterTextSplitter
4
- from langchain_core.prompts import ChatPromptTemplate
5
- from langchain_core.runnables import RunnablePassthrough
6
- from langchain_core.output_parsers import StrOutputParser
7
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
- import torch
9
-
10
- # Step A: Choose an embedding model (turns text into vectors for searching)
11
- # This one is small and fast; from Hugging Face hub.
12
- embed_model_id = "BAAI/bge-small-en-v1.5"
13
- embeddings = HuggingFaceEmbeddings(
14
- model_name=embed_model_id,
15
- model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
16
- )
17
-
18
- # Step B: Add your documents (replace with your own text, or load from files)
19
- texts = [
20
- "Kragujevac is a city in central Serbia founded in the 15th century.",
21
- "The main industry in Kragujevac includes automotive manufacturing.",
22
- "Famous landmarks: The Šumarice Memorial Park and the Old Foundry Museum."
23
- # Add more! For PDFs, use libraries like PyPDF2 to extract text.
24
- ]
25
-
26
- # Split long texts into chunks for better retrieval
27
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)
28
- docs = text_splitter.create_documents(texts)
29
-
30
- # Step C: Build the vector store (database of document vectors)
31
- vectorstore = FAISS.from_documents(docs, embeddings)
32
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 matches
33
-
34
- # Step D: Choose a language model (LLM) from Hugging Face
35
- # This is a small, capable model. If it asks for a token, add hf_token="your_token_here"
36
- # Step D: Choose a language model (LLM) from Hugging Face
37
- model_id = "Qwen/Qwen2.5-0.5B-Instruct" # ← Smaller than 3B: faster load, less RAM (~3–5 GB needed)
38
-
39
- tokenizer = AutoTokenizer.from_pretrained(model_id)
40
-
41
- model = AutoModelForCausalLM.from_pretrained(
42
- model_id,
43
- device_map="cpu", # Force CPU – no GPU attempt
44
- torch_dtype=torch.float32 # Full precision, safe for CPU (remove bfloat16!)
45
- # IMPORTANT: No BitsAndBytesConfig / no load_in_4bit / no quantization here
46
- )
47
-
48
- pipe = pipeline(
49
- "text-generation",
50
- model=model,
51
- tokenizer=tokenizer,
52
- max_new_tokens=200,
53
- temperature=0.7,
54
- do_sample=True
55
- )
56
-
57
- llm = HuggingFacePipeline(pipeline=pipe)
58
-
59
- # Step E: Define the prompt template (instructions for the AI)
60
- template = """You are a helpful assistant. Use only the provided context to answer.
61
- If unsure, say "I don't know."
62
-
63
- Context: {context}
64
-
65
- Question: {question}
66
-
67
- Answer:"""
68
- prompt = ChatPromptTemplate.from_template(template)
69
-
70
- # Step F: Chain it all together (retrieval + prompt + LLM)
71
- def format_docs(docs):
72
- return "\n\n".join(doc.page_content for doc in docs)
73
-
74
- rag_chain = (
75
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
76
- | prompt
77
- | llm
78
- | StrOutputParser()
79
- )
80
-
81
- # Step G: Test it!
82
- question = "What are some landmarks in Kragujevac?"
83
- print("Question:", question)
84
- print("Answer:", rag_chain.invoke(question))