Shreyas094 commited on
Commit
e13157e
·
verified ·
1 Parent(s): e1d6596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -62
app.py CHANGED
@@ -1,21 +1,8 @@
1
  import gradio as gr
2
  from duckduckgo_search import DDGS
3
- from typing import List, Dict
4
  from huggingface_hub import InferenceClient
5
  import os
6
- from langchain.chains import ConversationalRetrievalChain
7
- from langchain.memory import ConversationBufferMemory
8
- from langchain.llms import HuggingFacePipeline
9
- from langchain.embeddings import HuggingFaceEmbeddings
10
- from langchain.vectorstores import FAISS
11
- from langchain.schema import Document
12
- from transformers import pipeline
13
- from langchain.llms import HuggingFaceHub
14
- from langchain.llms import HuggingFaceHub
15
- from langchain_core.retrievers import BaseRetriever
16
- from pydantic import BaseModel, Field
17
- from typing import List
18
- from typing import List, Dict, Any
19
 
20
  # Environment variables and configurations
21
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -36,63 +23,92 @@ def get_web_search_results(query: str, max_results: int = 10) -> List[Dict[str,
36
  print(f"An error occurred during web search: {str(e)}")
37
  return [{"error": f"An error occurred during web search: {str(e)}"}]
38
 
39
- class DuckDuckGoRetriever(BaseRetriever, BaseModel):
40
- embeddings: Any = Field(description="The embeddings to use for the retriever")
41
- vectorstore: Any = Field(default_factory=lambda: None, description="The vector store to use for the retriever")
42
-
43
- class Config:
44
- arbitrary_types_allowed = True
45
-
46
- def __init__(self, **data):
47
- super().__init__(**data)
48
- self.vectorstore = FAISS.from_texts(["Initial document"], self.embeddings)
49
-
50
- def get_relevant_documents(self, query: str) -> List[Document]:
51
- search_results = get_web_search_results(query)
52
- docs = [Document(page_content=f"Title: {result['title']}\nContent: {result['body']}",
53
- metadata={"source": result['href']}) for result in search_results]
54
 
55
- # Update the vector store with new documents
56
- self.vectorstore.add_documents(docs)
 
 
57
 
58
- # Perform similarity search to get most relevant documents
59
- return self.vectorstore.similarity_search(query, k=3)
60
-
61
- async def aget_relevant_documents(self, query: str) -> List[Document]:
62
- return self.get_relevant_documents(query)
63
-
64
- def setup_retrieval_chain(model_name):
65
- # Set up the language model using HuggingFaceHub
66
- llm = HuggingFaceHub(
67
- repo_id=model_name,
68
- model_kwargs={"temperature": 0.7, "max_length": 512},
69
- huggingfacehub_api_token=huggingface_token
70
- )
71
-
72
- # Set up the embeddings
73
- embeddings = HuggingFaceEmbeddings()
74
-
75
- # Create the DuckDuckGo retriever
76
- retriever = DuckDuckGoRetriever(embeddings=embeddings)
77
 
78
- # Set up the memory
79
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
 
 
80
 
81
- # Create the conversational chain
82
- qa = ConversationalRetrievalChain.from_llm(
83
- llm=llm,
84
- retriever=retriever,
85
- memory=memory
86
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- return qa
89
 
90
- def respond(message, chat_history, model, temperature, num_api_calls):
91
- qa_chain = setup_retrieval_chain(model)
92
 
93
- result = qa_chain({"question": message})
 
 
 
 
 
 
 
 
94
 
95
- return result['answer']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  css = """
98
  Your custom CSS here
@@ -145,6 +161,7 @@ demo = gr.ChatInterface(
145
  likeable=True,
146
  layout="bubble",
147
  height=400,
 
148
  )
149
  )
150
 
 
1
  import gradio as gr
2
  from duckduckgo_search import DDGS
3
+ from typing import List, Dict, Tuple
4
  from huggingface_hub import InferenceClient
5
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Environment variables and configurations
8
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 
23
  print(f"An error occurred during web search: {str(e)}")
24
  return [{"error": f"An error occurred during web search: {str(e)}"}]
25
 
26
+ def summarize_results(query: str, search_results: List[Dict[str, str]], model: str) -> str:
27
+ try:
28
+ context = "\n\n".join([f"Title: {result['title']}\nContent: {result['body']}" for result in search_results])
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ prompt = f"""Based on the following web search results about '{query}', please create a comprehensive news article.
31
+ Include key facts, relevant statistics, and expert opinions if available.
32
+ Ensure the article is well-structured with an introduction, main body, and conclusion.
33
+ Cite sources directly within the generated text and not at the end of the generated text, integrating URLs where appropriate to support the information provided:
34
 
35
+ {context}
36
+
37
+ Article:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+
40
+ summary = DDGS().chat(prompt, model="llama-3-70b")
41
+ return summary
42
+ except Exception as e:
43
+ return f"An error occurred during summarization: {str(e)}"
44
 
45
+ def rephrase_query(previous_response: str, new_query: str) -> str:
46
+ client = InferenceClient(
47
+ "mistralai/Mistral-7B-Instruct-v0.3",
48
+ token=huggingface_token,
 
49
  )
50
+
51
+ if previous_response:
52
+ prompt = f"""Analyze the following previous response to understand the context:
53
+
54
+ {previous_response}
55
+
56
+ Now, given the new query:
57
+
58
+ {new_query}
59
+
60
+ Rephrase the new query to ensure it aligns with the context of the previous response. The rephrased query should be specific, concise, and optimized for a web search. Provide the rephrased query as a single-line response:"""
61
+ else:
62
+ prompt = f"""Given the new query:
63
+
64
+ {new_query}
65
+
66
+ Rephrase this query to make it more specific, concise, and optimized for a web search. Provide the rephrased query as a single-line response:"""
67
 
 
68
 
69
+ response = client.text_generation(prompt, max_new_tokens=30, temperature=0.3)
 
70
 
71
+ # Extract only the rephrased query from the response
72
+ rephrased_query = response.split("\n")[-1].strip()
73
+ return rephrased_query
74
+
75
+ def respond(message, chat_history, model, temperature, num_api_calls):
76
+ rephrased_query = message
77
+ if chat_history:
78
+ previous_response = chat_history[-1][1]
79
+ rephrased_query = rephrase_query(previous_response, message)
80
 
81
+ print(f"Initial Rephrased Query: {rephrased_query}")
82
+
83
+ final_summary = ""
84
+ for _ in range(num_api_calls):
85
+ if not rephrased_query or not isinstance(rephrased_query, str):
86
+ print(f"Invalid rephrased query: {rephrased_query}")
87
+ break
88
+
89
+ search_results = get_web_search_results(rephrased_query)
90
+
91
+ # If no results or error, try rephrasing and searching again
92
+ if not search_results or "error" in search_results[0]:
93
+ print(f"No results found for: {rephrased_query}. Attempting to rephrase.")
94
+ rephrased_query = rephrase_query("", rephrased_query) # Rephrase without context
95
+ print(f"New Rephrased Query: {rephrased_query}")
96
+ search_results = get_web_search_results(rephrased_query)
97
+
98
+ if not search_results:
99
+ final_summary += f"No search results found for the query: {rephrased_query}\n\n"
100
+ elif "error" in search_results[0]:
101
+ final_summary += search_results[0]["error"] + "\n\n"
102
+ else:
103
+ summary = summarize_results(rephrased_query, search_results, model)
104
+ final_summary += summary + "\n\n"
105
+
106
+ return final_summary if final_summary else "Unable to generate a response. Please try a different query."
107
+
108
+ #def initial_conversation():
109
+ # return [
110
+ # (None, "Welcome! I'm your AI-powered Web Search and PDF Chat Assistant. I can help you find information on the web, summarize content, and analyze PDF documents. What would you like to know?")
111
+ # ]
112
 
113
  css = """
114
  Your custom CSS here
 
161
  likeable=True,
162
  layout="bubble",
163
  height=400,
164
+ # value=initial_conversation()
165
  )
166
  )
167