Spaces:

Shreyas094
/

WebSearchGPT

Build error

App Files Files Community

Shreyas094 commited on Aug 14, 2024

Commit

e13157e

verified ·

1 Parent(s): e1d6596

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -62

app.py CHANGED Viewed

@@ -1,21 +1,8 @@
 import gradio as gr
 from duckduckgo_search import DDGS
-from typing import List, Dict
 from huggingface_hub import InferenceClient
 import os
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-from langchain.llms import HuggingFacePipeline
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.schema import Document
-from transformers import pipeline
-from langchain.llms import HuggingFaceHub
-from langchain.llms import HuggingFaceHub
-from langchain_core.retrievers import BaseRetriever
-from pydantic import BaseModel, Field
-from typing import List
-from typing import List, Dict, Any
 # Environment variables and configurations
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -36,63 +23,92 @@ def get_web_search_results(query: str, max_results: int = 10) -> List[Dict[str,
         print(f"An error occurred during web search: {str(e)}")
         return [{"error": f"An error occurred during web search: {str(e)}"}]
-class DuckDuckGoRetriever(BaseRetriever, BaseModel):
-    embeddings: Any = Field(description="The embeddings to use for the retriever")
-    vectorstore: Any = Field(default_factory=lambda: None, description="The vector store to use for the retriever")
-    class Config:
-        arbitrary_types_allowed = True
-    def __init__(self, **data):
-        super().__init__(**data)
-        self.vectorstore = FAISS.from_texts(["Initial document"], self.embeddings)
-    def get_relevant_documents(self, query: str) -> List[Document]:
-        search_results = get_web_search_results(query)
-        docs = [Document(page_content=f"Title: {result['title']}\nContent: {result['body']}",
-                         metadata={"source": result['href']}) for result in search_results]
-        # Update the vector store with new documents
-        self.vectorstore.add_documents(docs)
-        # Perform similarity search to get most relevant documents
-        return self.vectorstore.similarity_search(query, k=3)
-    async def aget_relevant_documents(self, query: str) -> List[Document]:
-        return self.get_relevant_documents(query)
-def setup_retrieval_chain(model_name):
-    # Set up the language model using HuggingFaceHub
-    llm = HuggingFaceHub(
-        repo_id=model_name,
-        model_kwargs={"temperature": 0.7, "max_length": 512},
-        huggingfacehub_api_token=huggingface_token
-    )
-    # Set up the embeddings
-    embeddings = HuggingFaceEmbeddings()
-    # Create the DuckDuckGo retriever
-    retriever = DuckDuckGoRetriever(embeddings=embeddings)
-    # Set up the memory
-    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-    # Create the conversational chain
-    qa = ConversationalRetrievalChain.from_llm(
-        llm=llm,
-        retriever=retriever,
-        memory=memory
     )
-    return qa
-def respond(message, chat_history, model, temperature, num_api_calls):
-    qa_chain = setup_retrieval_chain(model)
-    result = qa_chain({"question": message})
-    return result['answer']
 css = """
 Your custom CSS here
@@ -145,6 +161,7 @@ demo = gr.ChatInterface(
         likeable=True,
         layout="bubble",
         height=400,
     )
 )

 import gradio as gr
 from duckduckgo_search import DDGS
+from typing import List, Dict, Tuple
 from huggingface_hub import InferenceClient
 import os
 # Environment variables and configurations
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
         print(f"An error occurred during web search: {str(e)}")
         return [{"error": f"An error occurred during web search: {str(e)}"}]
+def summarize_results(query: str, search_results: List[Dict[str, str]], model: str) -> str:
+    try:
+        context = "\n\n".join([f"Title: {result['title']}\nContent: {result['body']}" for result in search_results])
+        prompt = f"""Based on the following web search results about '{query}', please create a comprehensive news article.
+        Include key facts, relevant statistics, and expert opinions if available.
+        Ensure the article is well-structured with an introduction, main body, and conclusion.
+        Cite sources directly within the generated text and not at the end of the generated text, integrating URLs where appropriate to support the information provided:
+        {context}
+        Article:"""
+        summary = DDGS().chat(prompt, model="llama-3-70b")
+        return summary
+    except Exception as e:
+        return f"An error occurred during summarization: {str(e)}"
+def rephrase_query(previous_response: str, new_query: str) -> str:
+    client = InferenceClient(
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        token=huggingface_token,
     )
+    if previous_response:
+        prompt = f"""Analyze the following previous response to understand the context:
+        {previous_response}
+        Now, given the new query:
+        {new_query}
+        Rephrase the new query to ensure it aligns with the context of the previous response. The rephrased query should be specific, concise, and optimized for a web search. Provide the rephrased query as a single-line response:"""
+    else:
+        prompt = f"""Given the new query:
+        {new_query}
+        Rephrase this query to make it more specific, concise, and optimized for a web search. Provide the rephrased query as a single-line response:"""
+    response = client.text_generation(prompt, max_new_tokens=30, temperature=0.3)
+    # Extract only the rephrased query from the response
+    rephrased_query = response.split("\n")[-1].strip()
+    return rephrased_query
+def respond(message, chat_history, model, temperature, num_api_calls):
+    rephrased_query = message
+    if chat_history:
+        previous_response = chat_history[-1][1]
+        rephrased_query = rephrase_query(previous_response, message)
+    print(f"Initial Rephrased Query: {rephrased_query}")
+    final_summary = ""
+    for _ in range(num_api_calls):
+        if not rephrased_query or not isinstance(rephrased_query, str):
+            print(f"Invalid rephrased query: {rephrased_query}")
+            break
+        search_results = get_web_search_results(rephrased_query)
+        # If no results or error, try rephrasing and searching again
+        if not search_results or "error" in search_results[0]:
+            print(f"No results found for: {rephrased_query}. Attempting to rephrase.")
+            rephrased_query = rephrase_query("", rephrased_query)  # Rephrase without context
+            print(f"New Rephrased Query: {rephrased_query}")
+            search_results = get_web_search_results(rephrased_query)
+        if not search_results:
+            final_summary += f"No search results found for the query: {rephrased_query}\n\n"
+        elif "error" in search_results[0]:
+            final_summary += search_results[0]["error"] + "\n\n"
+        else:
+            summary = summarize_results(rephrased_query, search_results, model)
+            final_summary += summary + "\n\n"
+    return final_summary if final_summary else "Unable to generate a response. Please try a different query."
+#def initial_conversation():
+#    return [
+#        (None, "Welcome! I'm your AI-powered Web Search and PDF Chat Assistant. I can help you find information on the web, summarize content, and analyze PDF documents. What would you like to know?")
+#    ]
 css = """
 Your custom CSS here
         likeable=True,
         layout="bubble",
         height=400,
+#        value=initial_conversation()
     )
 )