Rag_chatbot_pwc

Sleeping

App Files Files Community

Michtiii commited on 20 days ago

Commit

ebc2ded

verified ·

1 Parent(s): e35e6b9

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -51

app.py CHANGED Viewed

@@ -8,104 +8,110 @@ from langchain_community.vectorstores import FAISS
 from transformers import pipeline
 # -------------------------
-# Search Function (DuckDuckGo)
 # -------------------------
-def search_web(query, num_results=3):
     url = f"https://duckduckgo.com/html/?q={query}"
     headers = {"User-Agent": "Mozilla/5.0"}
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.text, "html.parser")
     links = []
     for a in soup.find_all("a", {"class": "result__a"}, href=True):
         links.append(a["href"])
-        if len(links) >= num_results:
             break
     return links
 # -------------------------
-# Extract Content from URL
 # -------------------------
-def load_website(url):
     try:
         headers = {"User-Agent": "Mozilla/5.0"}
-        response = requests.get(url, headers=headers, timeout=5)
-        soup = BeautifulSoup(response.text, "html.parser")
-        texts = soup.find_all(["p", "h1", "h2", "h3"])
-        content = " ".join([t.get_text().strip() for t in texts if t.get_text().strip()])
-        return content
     except:
         return ""
 # -------------------------
-# Build RAG from Search
 # -------------------------
-def build_rag(query):
     urls = search_web(query)
     all_text = ""
-    for url in urls:
-        content = load_website(url)
-        all_text += content + " "
     if len(all_text.strip()) == 0:
         return None
     splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
-    texts = splitter.split_text(all_text)
-    embeddings = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-MiniLM-L6-v2"
-    )
-    vectorstore = FAISS.from_texts(texts, embeddings)
-    return vectorstore
-# -------------------------
-# LLM (HF only)
-# -------------------------
-generator = pipeline(
-    "text-generation",
-    model="microsoft/phi-2",
-    max_new_tokens=150,
-    temperature=0.2,
-    do_sample=False
-)
 # -------------------------
 # Chat Function
 # -------------------------
-def rag_chat(message, history):
-    vectorstore = build_rag(message)
-    if vectorstore is None:
-        return "I couldn't find relevant information."
-    docs = vectorstore.similarity_search(message, k=3)
-    context = " ".join([doc.page_content for doc in docs])
     prompt = f"""
-You are a professional AI assistant.
-Summarize the answer using the context below.
-If unsure, say "I don't know".
 Context:
 {context}
-Question:
 {message}
-Answer:
 """
     result = generator(prompt)[0]["generated_text"]
-    answer = result.replace(prompt, "").strip().split("\n")[0]
     return answer
@@ -113,8 +119,8 @@ Answer:
 # UI
 # -------------------------
 demo = gr.ChatInterface(
-    fn=rag_chat,
-    title="Live Search RAG Chatbot"
 )
 if __name__ == "__main__":

 from transformers import pipeline
 # -------------------------
+# Load Models (HF only)
 # -------------------------
+generator = pipeline(
+    "text-generation",
+    model="microsoft/phi-2",
+    max_new_tokens=150,
+    temperature=0.3,
+    do_sample=True
+)
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2"
+)
+# -------------------------
+# Search (DuckDuckGo)
+# -------------------------
+def search_web(query):
     url = f"https://duckduckgo.com/html/?q={query}"
     headers = {"User-Agent": "Mozilla/5.0"}
+    res = requests.get(url, headers=headers)
+    soup = BeautifulSoup(res.text, "html.parser")
     links = []
     for a in soup.find_all("a", {"class": "result__a"}, href=True):
         links.append(a["href"])
+        if len(links) >= 3:
             break
     return links
 # -------------------------
+# Extract Text
 # -------------------------
+def extract_text(url):
     try:
         headers = {"User-Agent": "Mozilla/5.0"}
+        res = requests.get(url, headers=headers, timeout=5)
+        soup = BeautifulSoup(res.text, "html.parser")
+        texts = soup.find_all(["p", "h1", "h2"])
+        return " ".join([t.get_text().strip() for t in texts if t.get_text().strip()])
     except:
         return ""
 # -------------------------
+# Build RAG Context
 # -------------------------
+def get_context(query):
     urls = search_web(query)
     all_text = ""
+    for u in urls:
+        all_text += extract_text(u) + " "
     if len(all_text.strip()) == 0:
         return None
     splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
+    chunks = splitter.split_text(all_text)
+    vectordb = FAISS.from_texts(chunks, embeddings)
+    docs = vectordb.similarity_search(query, k=3)
+    return " ".join([d.page_content for d in docs])
 # -------------------------
 # Chat Function
 # -------------------------
+def chat(message, history):
+    context = get_context(message)
+    # Memory (last 3 chats)
+    history_text = ""
+    for user, bot in history[-3:]:
+        history_text += f"User: {user}\nBot: {bot}\n"
     prompt = f"""
+You are a professional ChatGPT-like assistant.
+Rules:
+- Use context if available
+- If context is None, answer generally
+- Keep answers clear and concise
+- Avoid repetition
 Context:
 {context}
+Conversation:
+{history_text}
+User:
 {message}
+Assistant:
 """
     result = generator(prompt)[0]["generated_text"]
+    answer = result.replace(prompt, "").strip()
+    answer = answer.split("\n")[0]
     return answer
 # UI
 # -------------------------
 demo = gr.ChatInterface(
+    fn=chat,
+    title="HF ChatGPT (RAG + Search + Memory)"
 )
 if __name__ == "__main__":