Subha95 commited on
Commit
ba3e7c5
·
verified ·
1 Parent(s): 1ae31f5

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +23 -21
chatbot_rag.py CHANGED
@@ -67,26 +67,27 @@ def build_qa():
67
  # 4. Retriever
68
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
69
 
70
- # 5. Prompt
71
  prompt = PromptTemplate(
72
- input_variables=["context", "question"],
73
- template="""
74
- Use the following context to answer the question at the end.
75
- If you don't know the answer, just say "I don't know" — do not make up an answer.
76
-
77
- Context:
78
- {context}
79
-
80
- Question: {question}
81
- Answer (one short sentence):
82
- """,
 
 
83
  )
84
 
 
85
  # 6. Helper functions
86
  def format_docs(docs):
87
  return "\n".join(doc.page_content for doc in docs)
88
 
89
-
90
  def hf_to_str(x):
91
  """Convert Hugging Face pipeline output to clean plain text."""
92
  if isinstance(x, list) and "generated_text" in x[0]:
@@ -94,28 +95,29 @@ def build_qa():
94
  else:
95
  text = str(x)
96
 
97
- # 1. Remove code-like artifacts
 
98
  text = re.sub(r"def\s+\w+\(.*?\):.*", "", text, flags=re.DOTALL)
99
- text = re.sub(r"(print\(.*?\))", "", text)
100
  text = re.sub(r"text\s*\+=.*", "", text)
101
 
102
- # 2. Remove markdown/code fences
103
- text = text.replace("```", "").replace("'''", "").replace('"""', "")
104
 
105
- # 3. Normalize whitespace & line breaks
106
  text = re.sub(r"\s+", " ", text)
107
 
108
- # 4. Remove repeated sentences (simple dedupe)
109
  sentences = []
110
  for s in re.split(r"(?<=[.!?])\s+", text):
111
- if s not in sentences:
112
  sentences.append(s)
113
  text = " ".join(sentences)
114
 
115
- # 5. Trim
116
  return text.strip()
117
 
118
 
 
119
  # 7. RAG chain
120
  rag_chain = (
121
  {
 
67
  # 4. Retriever
68
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
69
 
 
70
  prompt = PromptTemplate(
71
+ input_variables=["context", "question"],
72
+ template="""
73
+ Use the following context to answer the question.
74
+ - Answer in plain natural language.
75
+ - Do not include code, imports, functions, or explanations of how to implement code.
76
+ - If you don't know, just say "I don't know."
77
+
78
+ Context:
79
+ {context}
80
+
81
+ Question: {question}
82
+ Answer (in short ):
83
+ """,
84
  )
85
 
86
+
87
  # 6. Helper functions
88
  def format_docs(docs):
89
  return "\n".join(doc.page_content for doc in docs)
90
 
 
91
  def hf_to_str(x):
92
  """Convert Hugging Face pipeline output to clean plain text."""
93
  if isinstance(x, list) and "generated_text" in x[0]:
 
95
  else:
96
  text = str(x)
97
 
98
+ # Remove code-like patterns (imports, defs, classes, etc.)
99
+ text = re.sub(r"(from\s+\w+\s+import\s+.*|import\s+\w+.*)", "", text)
100
  text = re.sub(r"def\s+\w+\(.*?\):.*", "", text, flags=re.DOTALL)
101
+ text = re.sub(r"class\s+\w+.*?:.*", "", text, flags=re.DOTALL)
102
  text = re.sub(r"text\s*\+=.*", "", text)
103
 
104
+ # Remove markdown/code fences & quotes
105
+ text = text.replace("```", "").replace("'''", "").replace('"""', "").replace("\\n", " ")
106
 
107
+ # Normalize whitespace
108
  text = re.sub(r"\s+", " ", text)
109
 
110
+ # Deduplicate repeated sentences
111
  sentences = []
112
  for s in re.split(r"(?<=[.!?])\s+", text):
113
+ if s and s not in sentences:
114
  sentences.append(s)
115
  text = " ".join(sentences)
116
 
 
117
  return text.strip()
118
 
119
 
120
+
121
  # 7. RAG chain
122
  rag_chain = (
123
  {