Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,104 +8,110 @@ from langchain_community.vectorstores import FAISS
|
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
# -------------------------
|
| 11 |
-
#
|
| 12 |
# -------------------------
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
url = f"https://duckduckgo.com/html/?q={query}"
|
| 15 |
headers = {"User-Agent": "Mozilla/5.0"}
|
| 16 |
|
| 17 |
-
|
| 18 |
-
soup = BeautifulSoup(
|
| 19 |
|
| 20 |
links = []
|
| 21 |
for a in soup.find_all("a", {"class": "result__a"}, href=True):
|
| 22 |
links.append(a["href"])
|
| 23 |
-
if len(links) >=
|
| 24 |
break
|
| 25 |
-
|
| 26 |
return links
|
| 27 |
|
| 28 |
# -------------------------
|
| 29 |
-
# Extract
|
| 30 |
# -------------------------
|
| 31 |
-
def
|
| 32 |
try:
|
| 33 |
headers = {"User-Agent": "Mozilla/5.0"}
|
| 34 |
-
|
| 35 |
-
soup = BeautifulSoup(
|
| 36 |
-
|
| 37 |
-
texts = soup.find_all(["p", "h1", "h2", "h3"])
|
| 38 |
-
content = " ".join([t.get_text().strip() for t in texts if t.get_text().strip()])
|
| 39 |
|
| 40 |
-
|
|
|
|
| 41 |
except:
|
| 42 |
return ""
|
| 43 |
|
| 44 |
# -------------------------
|
| 45 |
-
# Build RAG
|
| 46 |
# -------------------------
|
| 47 |
-
def
|
|
|
|
| 48 |
urls = search_web(query)
|
| 49 |
|
| 50 |
all_text = ""
|
| 51 |
-
for
|
| 52 |
-
|
| 53 |
-
all_text += content + " "
|
| 54 |
|
| 55 |
if len(all_text.strip()) == 0:
|
| 56 |
return None
|
| 57 |
|
| 58 |
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
embeddings = HuggingFaceEmbeddings(
|
| 62 |
-
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 63 |
-
)
|
| 64 |
|
| 65 |
-
|
| 66 |
-
return vectorstore
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
# -------------------------
|
| 71 |
-
generator = pipeline(
|
| 72 |
-
"text-generation",
|
| 73 |
-
model="microsoft/phi-2",
|
| 74 |
-
max_new_tokens=150,
|
| 75 |
-
temperature=0.2,
|
| 76 |
-
do_sample=False
|
| 77 |
-
)
|
| 78 |
|
| 79 |
# -------------------------
|
| 80 |
# Chat Function
|
| 81 |
# -------------------------
|
| 82 |
-
def
|
| 83 |
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
context = " ".join([doc.page_content for doc in docs])
|
| 91 |
|
| 92 |
prompt = f"""
|
| 93 |
-
You are a professional
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
Context:
|
| 99 |
{context}
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
| 102 |
{message}
|
| 103 |
|
| 104 |
-
|
| 105 |
"""
|
| 106 |
|
| 107 |
result = generator(prompt)[0]["generated_text"]
|
| 108 |
-
|
|
|
|
|
|
|
| 109 |
|
| 110 |
return answer
|
| 111 |
|
|
@@ -113,8 +119,8 @@ Answer:
|
|
| 113 |
# UI
|
| 114 |
# -------------------------
|
| 115 |
demo = gr.ChatInterface(
|
| 116 |
-
fn=
|
| 117 |
-
title="
|
| 118 |
)
|
| 119 |
|
| 120 |
if __name__ == "__main__":
|
|
|
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
# -------------------------
|
| 11 |
+
# Load Models (HF only)
|
| 12 |
# -------------------------
|
| 13 |
+
generator = pipeline(
|
| 14 |
+
"text-generation",
|
| 15 |
+
model="microsoft/phi-2",
|
| 16 |
+
max_new_tokens=150,
|
| 17 |
+
temperature=0.3,
|
| 18 |
+
do_sample=True
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
embeddings = HuggingFaceEmbeddings(
|
| 22 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# -------------------------
|
| 26 |
+
# Search (DuckDuckGo)
|
| 27 |
+
# -------------------------
|
| 28 |
+
def search_web(query):
|
| 29 |
url = f"https://duckduckgo.com/html/?q={query}"
|
| 30 |
headers = {"User-Agent": "Mozilla/5.0"}
|
| 31 |
|
| 32 |
+
res = requests.get(url, headers=headers)
|
| 33 |
+
soup = BeautifulSoup(res.text, "html.parser")
|
| 34 |
|
| 35 |
links = []
|
| 36 |
for a in soup.find_all("a", {"class": "result__a"}, href=True):
|
| 37 |
links.append(a["href"])
|
| 38 |
+
if len(links) >= 3:
|
| 39 |
break
|
|
|
|
| 40 |
return links
|
| 41 |
|
| 42 |
# -------------------------
|
| 43 |
+
# Extract Text
|
| 44 |
# -------------------------
|
| 45 |
+
def extract_text(url):
|
| 46 |
try:
|
| 47 |
headers = {"User-Agent": "Mozilla/5.0"}
|
| 48 |
+
res = requests.get(url, headers=headers, timeout=5)
|
| 49 |
+
soup = BeautifulSoup(res.text, "html.parser")
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
texts = soup.find_all(["p", "h1", "h2"])
|
| 52 |
+
return " ".join([t.get_text().strip() for t in texts if t.get_text().strip()])
|
| 53 |
except:
|
| 54 |
return ""
|
| 55 |
|
| 56 |
# -------------------------
|
| 57 |
+
# Build RAG Context
|
| 58 |
# -------------------------
|
| 59 |
+
def get_context(query):
|
| 60 |
+
|
| 61 |
urls = search_web(query)
|
| 62 |
|
| 63 |
all_text = ""
|
| 64 |
+
for u in urls:
|
| 65 |
+
all_text += extract_text(u) + " "
|
|
|
|
| 66 |
|
| 67 |
if len(all_text.strip()) == 0:
|
| 68 |
return None
|
| 69 |
|
| 70 |
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
|
| 71 |
+
chunks = splitter.split_text(all_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
vectordb = FAISS.from_texts(chunks, embeddings)
|
|
|
|
| 74 |
|
| 75 |
+
docs = vectordb.similarity_search(query, k=3)
|
| 76 |
+
return " ".join([d.page_content for d in docs])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
# -------------------------
|
| 79 |
# Chat Function
|
| 80 |
# -------------------------
|
| 81 |
+
def chat(message, history):
|
| 82 |
|
| 83 |
+
context = get_context(message)
|
| 84 |
|
| 85 |
+
# Memory (last 3 chats)
|
| 86 |
+
history_text = ""
|
| 87 |
+
for user, bot in history[-3:]:
|
| 88 |
+
history_text += f"User: {user}\nBot: {bot}\n"
|
|
|
|
| 89 |
|
| 90 |
prompt = f"""
|
| 91 |
+
You are a professional ChatGPT-like assistant.
|
| 92 |
|
| 93 |
+
Rules:
|
| 94 |
+
- Use context if available
|
| 95 |
+
- If context is None, answer generally
|
| 96 |
+
- Keep answers clear and concise
|
| 97 |
+
- Avoid repetition
|
| 98 |
|
| 99 |
Context:
|
| 100 |
{context}
|
| 101 |
|
| 102 |
+
Conversation:
|
| 103 |
+
{history_text}
|
| 104 |
+
|
| 105 |
+
User:
|
| 106 |
{message}
|
| 107 |
|
| 108 |
+
Assistant:
|
| 109 |
"""
|
| 110 |
|
| 111 |
result = generator(prompt)[0]["generated_text"]
|
| 112 |
+
|
| 113 |
+
answer = result.replace(prompt, "").strip()
|
| 114 |
+
answer = answer.split("\n")[0]
|
| 115 |
|
| 116 |
return answer
|
| 117 |
|
|
|
|
| 119 |
# UI
|
| 120 |
# -------------------------
|
| 121 |
demo = gr.ChatInterface(
|
| 122 |
+
fn=chat,
|
| 123 |
+
title="HF ChatGPT (RAG + Search + Memory)"
|
| 124 |
)
|
| 125 |
|
| 126 |
if __name__ == "__main__":
|