Spaces:

Subha95
/

Harry_potter_wiki

Sleeping

App Files Files Community

Subha95 commited on Sep 7, 2025

Commit

772864e

verified ·

1 Parent(s): fb273e0

Update chatbot_rag.py

Browse files

Files changed (1) hide show

chatbot_rag.py +27 -69

chatbot_rag.py CHANGED Viewed

@@ -1,16 +1,16 @@
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.llms import HuggingFacePipeline
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
 from langchain.prompts import PromptTemplate
 from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
-import traceback
 import re
 import os
 from huggingface_hub import login
 token = os.getenv("HF_TOKEN")
 print("🔑 HF_TOKEN available?", token is not None)
 if token:
@@ -38,49 +38,41 @@ def build_qa():
     )
     print("📂 Docs in DB:", vectorstore._collection.count())
-    # 3. Load LLM (Phi-3 mini)
     print("🔹 Loading LLM...")
-    model_id = "microsoft/Phi-3-mini-4k-instruct"
-    # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    # Load model
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",       # put on GPU if available, else CPU
-        torch_dtype="auto",      # auto precision
-        trust_remote_code=True   # allow custom model code
     )
     model.config.use_cache = False
-    # Create pipeline
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
-        max_new_tokens=256,       # control length of response
-        temperature=0.2,          # more deterministic
-        do_sample=False,          # no randomness (deterministic answers)
-        top_p=0.9,                # nucleus sampling
-        repetition_penalty=1.2,   # 🚀 reduce loops/repeats
         eos_token_id=tokenizer.eos_token_id,
         return_full_text=False
     )
-    # 🔹 Wrap in LangChain LLM
     llm = HuggingFacePipeline(pipeline=pipe)
     # 4. Retriever
     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
     prompt = PromptTemplate(
         input_variables=["context", "question"],
-        template="""
-        Answer the question using the context below.
-        Respond in ONE short factual sentence only.
         If you don't know, say "I don't know."
         Context:
@@ -92,51 +84,21 @@ def build_qa():
         Answer:""",
     )
-    # 6. Helper functions
     def format_docs(docs):
-        """Join document contents into a single string, skipping empty ones."""
-        texts = []
-        for doc in docs:
-            if doc.page_content and isinstance(doc.page_content, str):
-                texts.append(doc.page_content.strip())
         return "\n".join(texts)
     def hf_to_str(x):
-        """Convert Hugging Face pipeline output to clean plain text."""
         if isinstance(x, list) and "generated_text" in x[0]:
             text = x[0]["generated_text"]
         else:
             text = str(x)
-        # Remove code-like patterns (imports, defs, classes, etc.)
-        text = re.sub(r"(from\s+\w+\s+import\s+.*|import\s+\w+.*)", "", text)
-        text = re.sub(r"def\s+\w+\(.*?\):.*", "", text, flags=re.DOTALL)
-        text = re.sub(r"class\s+\w+.*?:.*", "", text, flags=re.DOTALL)
-        text = re.sub(r"text\s*\+=.*", "", text)
-        # Remove markdown/code fences & quotes
-        text = text.replace("```", "").replace("'''", "").replace('"""', "").replace("\\n", " ")
-        # Normalize whitespace
-        text = re.sub(r"\s+", " ", text)
-        # Deduplicate repeated sentences
-        sentences = []
-        for s in re.split(r"(?<=[.!?])\s+", text):
-            if s and s not in sentences:
-                sentences.append(s)
-        text = " ".join(sentences)
-        return text.strip()
-    # 7. RAG chain
     rag_chain = (
         {
             "context": retriever | format_docs,
@@ -148,7 +110,6 @@ def build_qa():
         | StrOutputParser()
     )
     print("✅ QA pipeline ready.")
     return rag_chain
@@ -165,14 +126,11 @@ except Exception as e:
 def get_answer(query: str) -> str:
-    """
-    Run a query against the QA pipeline and return the answer text.
-    """
     if qa_pipeline is None:
         return "⚠️ QA pipeline not initialized."
     try:
-        result = qa_pipeline.invoke(query)   # for LCEL chain
         return result
     except Exception as e:
         return f"❌ QA run failed: {e}"

 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.llms import HuggingFacePipeline
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.prompts import PromptTemplate
 from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 import re
 import os
+import traceback
 from huggingface_hub import login
 token = os.getenv("HF_TOKEN")
 print("🔑 HF_TOKEN available?", token is not None)
 if token:
     )
     print("📂 Docs in DB:", vectorstore._collection.count())
+    # 3. Load LLM (Phi-3.5-mini-instruct)
     print("🔹 Loading LLM...")
+    model_id = "microsoft/Phi-3.5-mini-instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto",
+        torch_dtype="auto",
+        trust_remote_code=True
     )
     model.config.use_cache = False
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
+        max_new_tokens=80,       # shorter answers
+        temperature=0.2,         # deterministic
+        do_sample=False,
+        repetition_penalty=1.2,
         eos_token_id=tokenizer.eos_token_id,
         return_full_text=False
     )
     llm = HuggingFacePipeline(pipeline=pipe)
     # 4. Retriever
     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+    # 5. Prompt
     prompt = PromptTemplate(
         input_variables=["context", "question"],
+        template="""Answer the question using the context below.
+        Respond in ONE short factual sentence only.
         If you don't know, say "I don't know."
         Context:
         Answer:""",
     )
+    # 6. Helper
     def format_docs(docs):
+        texts = [doc.page_content.strip() for doc in docs if doc.page_content]
         return "\n".join(texts)
     def hf_to_str(x):
         if isinstance(x, list) and "generated_text" in x[0]:
             text = x[0]["generated_text"]
         else:
             text = str(x)
+        text = re.sub(r"\s+", " ", text).strip()
+        # ✅ Only keep first sentence
+        return re.split(r"(?<=[.!?])\s+", text)[0]
+    # 7. Chain
     rag_chain = (
         {
             "context": retriever | format_docs,
         | StrOutputParser()
     )
     print("✅ QA pipeline ready.")
     return rag_chain
 def get_answer(query: str) -> str:
+    """Run a query against the QA pipeline and return the answer text."""
     if qa_pipeline is None:
         return "⚠️ QA pipeline not initialized."
     try:
+        result = qa_pipeline.invoke(query)
         return result
     except Exception as e:
         return f"❌ QA run failed: {e}"