Michtiii commited on
Commit
ebc2ded
·
verified ·
1 Parent(s): e35e6b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -51
app.py CHANGED
@@ -8,104 +8,110 @@ from langchain_community.vectorstores import FAISS
8
  from transformers import pipeline
9
 
10
  # -------------------------
11
- # Search Function (DuckDuckGo)
12
  # -------------------------
13
- def search_web(query, num_results=3):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  url = f"https://duckduckgo.com/html/?q={query}"
15
  headers = {"User-Agent": "Mozilla/5.0"}
16
 
17
- response = requests.get(url, headers=headers)
18
- soup = BeautifulSoup(response.text, "html.parser")
19
 
20
  links = []
21
  for a in soup.find_all("a", {"class": "result__a"}, href=True):
22
  links.append(a["href"])
23
- if len(links) >= num_results:
24
  break
25
-
26
  return links
27
 
28
  # -------------------------
29
- # Extract Content from URL
30
  # -------------------------
31
- def load_website(url):
32
  try:
33
  headers = {"User-Agent": "Mozilla/5.0"}
34
- response = requests.get(url, headers=headers, timeout=5)
35
- soup = BeautifulSoup(response.text, "html.parser")
36
-
37
- texts = soup.find_all(["p", "h1", "h2", "h3"])
38
- content = " ".join([t.get_text().strip() for t in texts if t.get_text().strip()])
39
 
40
- return content
 
41
  except:
42
  return ""
43
 
44
  # -------------------------
45
- # Build RAG from Search
46
  # -------------------------
47
- def build_rag(query):
 
48
  urls = search_web(query)
49
 
50
  all_text = ""
51
- for url in urls:
52
- content = load_website(url)
53
- all_text += content + " "
54
 
55
  if len(all_text.strip()) == 0:
56
  return None
57
 
58
  splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
59
- texts = splitter.split_text(all_text)
60
-
61
- embeddings = HuggingFaceEmbeddings(
62
- model_name="sentence-transformers/all-MiniLM-L6-v2"
63
- )
64
 
65
- vectorstore = FAISS.from_texts(texts, embeddings)
66
- return vectorstore
67
 
68
- # -------------------------
69
- # LLM (HF only)
70
- # -------------------------
71
- generator = pipeline(
72
- "text-generation",
73
- model="microsoft/phi-2",
74
- max_new_tokens=150,
75
- temperature=0.2,
76
- do_sample=False
77
- )
78
 
79
  # -------------------------
80
  # Chat Function
81
  # -------------------------
82
- def rag_chat(message, history):
83
 
84
- vectorstore = build_rag(message)
85
 
86
- if vectorstore is None:
87
- return "I couldn't find relevant information."
88
-
89
- docs = vectorstore.similarity_search(message, k=3)
90
- context = " ".join([doc.page_content for doc in docs])
91
 
92
  prompt = f"""
93
- You are a professional AI assistant.
94
 
95
- Summarize the answer using the context below.
96
- If unsure, say "I don't know".
 
 
 
97
 
98
  Context:
99
  {context}
100
 
101
- Question:
 
 
 
102
  {message}
103
 
104
- Answer:
105
  """
106
 
107
  result = generator(prompt)[0]["generated_text"]
108
- answer = result.replace(prompt, "").strip().split("\n")[0]
 
 
109
 
110
  return answer
111
 
@@ -113,8 +119,8 @@ Answer:
113
  # UI
114
  # -------------------------
115
  demo = gr.ChatInterface(
116
- fn=rag_chat,
117
- title="Live Search RAG Chatbot"
118
  )
119
 
120
  if __name__ == "__main__":
 
8
  from transformers import pipeline
9
 
10
  # -------------------------
11
+ # Load Models (HF only)
12
  # -------------------------
13
+ generator = pipeline(
14
+ "text-generation",
15
+ model="microsoft/phi-2",
16
+ max_new_tokens=150,
17
+ temperature=0.3,
18
+ do_sample=True
19
+ )
20
+
21
+ embeddings = HuggingFaceEmbeddings(
22
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
23
+ )
24
+
25
+ # -------------------------
26
+ # Search (DuckDuckGo)
27
+ # -------------------------
28
+ def search_web(query):
29
  url = f"https://duckduckgo.com/html/?q={query}"
30
  headers = {"User-Agent": "Mozilla/5.0"}
31
 
32
+ res = requests.get(url, headers=headers)
33
+ soup = BeautifulSoup(res.text, "html.parser")
34
 
35
  links = []
36
  for a in soup.find_all("a", {"class": "result__a"}, href=True):
37
  links.append(a["href"])
38
+ if len(links) >= 3:
39
  break
 
40
  return links
41
 
42
  # -------------------------
43
+ # Extract Text
44
  # -------------------------
45
+ def extract_text(url):
46
  try:
47
  headers = {"User-Agent": "Mozilla/5.0"}
48
+ res = requests.get(url, headers=headers, timeout=5)
49
+ soup = BeautifulSoup(res.text, "html.parser")
 
 
 
50
 
51
+ texts = soup.find_all(["p", "h1", "h2"])
52
+ return " ".join([t.get_text().strip() for t in texts if t.get_text().strip()])
53
  except:
54
  return ""
55
 
56
  # -------------------------
57
+ # Build RAG Context
58
  # -------------------------
59
+ def get_context(query):
60
+
61
  urls = search_web(query)
62
 
63
  all_text = ""
64
+ for u in urls:
65
+ all_text += extract_text(u) + " "
 
66
 
67
  if len(all_text.strip()) == 0:
68
  return None
69
 
70
  splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
71
+ chunks = splitter.split_text(all_text)
 
 
 
 
72
 
73
+ vectordb = FAISS.from_texts(chunks, embeddings)
 
74
 
75
+ docs = vectordb.similarity_search(query, k=3)
76
+ return " ".join([d.page_content for d in docs])
 
 
 
 
 
 
 
 
77
 
78
  # -------------------------
79
  # Chat Function
80
  # -------------------------
81
+ def chat(message, history):
82
 
83
+ context = get_context(message)
84
 
85
+ # Memory (last 3 chats)
86
+ history_text = ""
87
+ for user, bot in history[-3:]:
88
+ history_text += f"User: {user}\nBot: {bot}\n"
 
89
 
90
  prompt = f"""
91
+ You are a professional ChatGPT-like assistant.
92
 
93
+ Rules:
94
+ - Use context if available
95
+ - If context is None, answer generally
96
+ - Keep answers clear and concise
97
+ - Avoid repetition
98
 
99
  Context:
100
  {context}
101
 
102
+ Conversation:
103
+ {history_text}
104
+
105
+ User:
106
  {message}
107
 
108
+ Assistant:
109
  """
110
 
111
  result = generator(prompt)[0]["generated_text"]
112
+
113
+ answer = result.replace(prompt, "").strip()
114
+ answer = answer.split("\n")[0]
115
 
116
  return answer
117
 
 
119
  # UI
120
  # -------------------------
121
  demo = gr.ChatInterface(
122
+ fn=chat,
123
+ title="HF ChatGPT (RAG + Search + Memory)"
124
  )
125
 
126
  if __name__ == "__main__":