robertolofaro commited on
Commit
0a65ae5
·
verified ·
1 Parent(s): e90a9cd

Upload 5 files

Browse files
qa_common.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import datetime
4
+ from llama_cpp import Llama
5
+
6
+ # ====================== COMMON CONFIG & PROMPT ======================
7
+ SYSTEM_PROMPT = """You are the reference expert for the articles contained in this database, all extracted from the website robertolofaro.com, and all focused on change.
8
+ #Your Mission:
9
+ When a user asks a question, your goal is to provide a structured response based ONLY on the articles provided in your training. Do not provide general advice from outside these sources.
10
+ # Response Format:
11
+ 1. Executive Summary: A 2-3 sentence overview answering the core query.
12
+ 2. Guidelines & Hints: A markdown list of specific "answers/guidelines/hints" found in the source material.
13
+ """
14
+
15
+ def build_prompt(query: str, context: str = "") -> str:
16
+ prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
17
+
18
+ if context:
19
+ prompt += f"<|im_start|>user\nContext:\n{context}\n\nQuestion: {query}<|im_end|>\n"
20
+ else:
21
+ prompt += f"<|im_start|>user\n{query}<|im_end|>\n"
22
+
23
+ prompt += "<|im_start|>assistant\n"
24
+ return prompt
25
+
26
+
27
+ def generate_answer(llm, prompt: str, max_tokens=1200):
28
+ output = llm(
29
+ prompt,
30
+ max_tokens=max_tokens,
31
+ temperature=0.65,
32
+ top_p=0.9,
33
+ stop=["<|im_end|>", "<|im_start|>"],
34
+ echo=False,
35
+ )
36
+ return output["choices"][0]["text"].strip()
37
+
38
+
39
+ def save_result(query: str, answer: str, output_file="answer.md"):
40
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
41
+
42
+ markdown = f"""# Q&A Result
43
+
44
+ ## Timestamp
45
+ {now}
46
+
47
+ ## Question
48
+ {query}
49
+
50
+ ## Answer
51
+ {answer}
52
+ """
53
+ with open(output_file, "w", encoding="utf-8") as f:
54
+ f.write(markdown)
55
+
56
+ print(f"✅ Saved to: {output_file}")
57
+ print("="*80)
58
+ print(answer)
59
+ print("="*80)
60
+
61
+
62
+ def parse_args():
63
+ parser = argparse.ArgumentParser()
64
+ parser.add_argument("--prompt", type=str, help="Question to ask")
65
+ parser.add_argument("--output", type=str, default="answer.md")
66
+ return parser.parse_args()
qa_markdown_chroma_externalized.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from qa_common import parse_args, build_prompt, generate_answer, save_result
3
+ # REVISED: Imported from the dedicated langchain_chroma package
4
+ from langchain_chroma import Chroma
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from llama_cpp import Llama
7
+
8
+ # ====================== CHROMA SPECIFIC ======================
9
+ VECTORSTORE_PATH = "chroma_db"
10
+ MODEL_PATH = "articles-Q4_K_M.gguf"
11
+
12
+ print("Loading embedding model...")
13
+ embeddings = HuggingFaceEmbeddings(
14
+ model_name="BAAI/bge-small-en-v1.5",
15
+ encode_kwargs={'normalize_embeddings': True}
16
+ )
17
+
18
+ print("Loading Chroma vector store...")
19
+ vectorstore = Chroma(
20
+ persist_directory=VECTORSTORE_PATH,
21
+ embedding_function=embeddings
22
+ )
23
+
24
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
25
+
26
+ print("Loading LLM...")
27
+ llm = Llama(
28
+ model_path=MODEL_PATH,
29
+ n_ctx=65000,
30
+ n_threads=8,
31
+ verbose=False,
32
+ )
33
+
34
+
35
+ def get_context(query: str) -> str:
36
+ """Retrieve context using Chroma"""
37
+ docs = retriever.invoke(query)
38
+ return "\n\n".join([
39
+ f"[Article: {doc.metadata.get('article_title', 'N/A')}] "
40
+ f"{doc.page_content}"
41
+ for doc in docs
42
+ ])
43
+
44
+
45
+ if __name__ == "__main__":
46
+ args = parse_args()
47
+ query = args.prompt if args.prompt else input("\nQuestion: ")
48
+
49
+ print("Retrieving context and generating answer...\n")
50
+
51
+ context = get_context(query)
52
+ prompt = build_prompt(query, context)
53
+ answer = generate_answer(llm, prompt)
54
+
55
+ save_result(query, answer, args.output)
qa_markdown_faiss_hnsw_externalized.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from qa_common import parse_args, build_prompt, generate_answer, save_result
3
+ import faiss
4
+ import pickle
5
+ from sentence_transformers import SentenceTransformer
6
+ from llama_cpp import Llama
7
+
8
+ # ====================== FAISS HNSW SPECIFIC ======================
9
+ INDEX_PATH = "faiss_hnsw/vector_search.index"
10
+ METADATA_PATH = "faiss_hnsw/metadata.pkl"
11
+ MODEL_PATH = "articles-Q4_K_M.gguf"
12
+
13
+ print("Loading embedding model...")
14
+ embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
15
+
16
+ print("Loading FAISS HNSW index...")
17
+ index = faiss.read_index(INDEX_PATH)
18
+
19
+ print("Loading metadata...")
20
+ with open(METADATA_PATH, "rb") as f:
21
+ metadata = pickle.load(f)
22
+
23
+ print("Loading LLM...")
24
+ llm = Llama(model_path=MODEL_PATH, n_ctx=25000, n_threads=8, verbose=False)
25
+
26
+
27
+ def get_context(query: str, k=5) -> str:
28
+ query_vec = embed_model.encode([query], normalize_embeddings=True).astype('float32')
29
+ _, indices = index.search(query_vec, k)
30
+
31
+ chunks = []
32
+ for idx in indices[0]:
33
+ row = metadata.iloc[idx]
34
+ chunk = f"[Article: {row['article_title']}] \n{row['article_content']}"
35
+ chunks.append(chunk)
36
+ return "\n\n".join(chunks)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ args = parse_args()
41
+ query = args.prompt if args.prompt else input("\nQuestion: ")
42
+
43
+ print("Retrieving context and generating answer...\n")
44
+
45
+ context = get_context(query, k=5)
46
+ prompt = build_prompt(query, context)
47
+ answer = generate_answer(llm, prompt)
48
+
49
+ save_result(query, answer, args.output)
qa_markdown_fast.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from qa_common import parse_args, build_prompt, generate_answer, save_result
3
+ from llama_cpp import Llama
4
+
5
+ # ====================== CONFIG ======================
6
+ MODEL_PATH = "articles-Q4_K_M.gguf"
7
+
8
+ print("Loading GGUF model...")
9
+ llm = Llama(
10
+ model_path=MODEL_PATH,
11
+ n_ctx=8192,
12
+ n_threads=8,
13
+ verbose=False,
14
+ )
15
+
16
+ def answer(query: str):
17
+ prompt = build_prompt(query, context="") # No context = pure model
18
+ return generate_answer(llm, prompt, max_tokens=1100)
19
+
20
+
21
+ if __name__ == "__main__":
22
+ args = parse_args()
23
+ query = args.prompt if args.prompt else input("\nQuestion: ")
24
+
25
+ print("Generating answer using fine-tuned model (Fast Mode)...\n")
26
+
27
+ answer_text = answer(query)
28
+ save_result(query, answer_text, args.output)
qa_markdown_qdrant_externalized.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from qa_common import parse_args, build_prompt, generate_answer, save_result
3
+ from langchain_qdrant import QdrantVectorStore
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from llama_cpp import Llama
6
+ # FIX 1: Import the native client to manage its lifecycle
7
+ from qdrant_client import QdrantClient
8
+
9
+ # ====================== QDRANT SPECIFIC ======================
10
+ QDRANT_PATH = "qdrant_db"
11
+ COLLECTION_NAME = "articles"
12
+ MODEL_PATH = "articles-Q4_K_M.gguf"
13
+
14
+ print("Loading embedding model...")
15
+ embeddings = HuggingFaceEmbeddings(
16
+ model_name="BAAI/bge-small-en-v1.5",
17
+ encode_kwargs={'normalize_embeddings': True}
18
+ )
19
+
20
+ print("Loading Qdrant vector store...")
21
+ # FIX 2: Create the client explicitly
22
+ client = QdrantClient(path=QDRANT_PATH)
23
+
24
+ # Pass the client directly to the vector store
25
+ vectorstore = QdrantVectorStore(
26
+ client=client,
27
+ collection_name=COLLECTION_NAME,
28
+ embedding=embeddings
29
+ )
30
+
31
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
32
+
33
+ print("Loading LLM...")
34
+ llm = Llama(model_path=MODEL_PATH, n_ctx=25000, n_threads=8, verbose=False)
35
+
36
+
37
+ def get_context(query: str) -> str:
38
+ docs = retriever.invoke(query)
39
+ return "\n\n".join([
40
+ f"[Article: {doc.metadata.get('article_title', 'N/A')}] "
41
+ f"{doc.page_content}"
42
+ for doc in docs
43
+ ])
44
+
45
+
46
+ if __name__ == "__main__":
47
+ args = parse_args()
48
+ query = args.prompt if args.prompt else input("\nQuestion: ")
49
+
50
+ print("Retrieving context and generating answer...\n")
51
+
52
+ context = get_context(query)
53
+ prompt = build_prompt(query, context)
54
+ answer = generate_answer(llm, prompt)
55
+
56
+ save_result(query, answer, args.output)
57
+
58
+ # FIX 3: Close connection explicitly while Python is still fully intact
59
+ print("Closing vector store connection...")
60
+ client.close()