Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,21 +1,8 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from duckduckgo_search import DDGS
|
| 3 |
-
from typing import List, Dict
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
import os
|
| 6 |
-
from langchain.chains import ConversationalRetrievalChain
|
| 7 |
-
from langchain.memory import ConversationBufferMemory
|
| 8 |
-
from langchain.llms import HuggingFacePipeline
|
| 9 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
| 10 |
-
from langchain.vectorstores import FAISS
|
| 11 |
-
from langchain.schema import Document
|
| 12 |
-
from transformers import pipeline
|
| 13 |
-
from langchain.llms import HuggingFaceHub
|
| 14 |
-
from langchain.llms import HuggingFaceHub
|
| 15 |
-
from langchain_core.retrievers import BaseRetriever
|
| 16 |
-
from pydantic import BaseModel, Field
|
| 17 |
-
from typing import List
|
| 18 |
-
from typing import List, Dict, Any
|
| 19 |
|
| 20 |
# Environment variables and configurations
|
| 21 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
@@ -36,63 +23,92 @@ def get_web_search_results(query: str, max_results: int = 10) -> List[Dict[str,
|
|
| 36 |
print(f"An error occurred during web search: {str(e)}")
|
| 37 |
return [{"error": f"An error occurred during web search: {str(e)}"}]
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
class Config:
|
| 44 |
-
arbitrary_types_allowed = True
|
| 45 |
-
|
| 46 |
-
def __init__(self, **data):
|
| 47 |
-
super().__init__(**data)
|
| 48 |
-
self.vectorstore = FAISS.from_texts(["Initial document"], self.embeddings)
|
| 49 |
-
|
| 50 |
-
def get_relevant_documents(self, query: str) -> List[Document]:
|
| 51 |
-
search_results = get_web_search_results(query)
|
| 52 |
-
docs = [Document(page_content=f"Title: {result['title']}\nContent: {result['body']}",
|
| 53 |
-
metadata={"source": result['href']}) for result in search_results]
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
| 62 |
-
return self.get_relevant_documents(query)
|
| 63 |
-
|
| 64 |
-
def setup_retrieval_chain(model_name):
|
| 65 |
-
# Set up the language model using HuggingFaceHub
|
| 66 |
-
llm = HuggingFaceHub(
|
| 67 |
-
repo_id=model_name,
|
| 68 |
-
model_kwargs={"temperature": 0.7, "max_length": 512},
|
| 69 |
-
huggingfacehub_api_token=huggingface_token
|
| 70 |
-
)
|
| 71 |
-
|
| 72 |
-
# Set up the embeddings
|
| 73 |
-
embeddings = HuggingFaceEmbeddings()
|
| 74 |
-
|
| 75 |
-
# Create the DuckDuckGo retriever
|
| 76 |
-
retriever = DuckDuckGoRetriever(embeddings=embeddings)
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
memory=memory
|
| 86 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
return qa
|
| 89 |
|
| 90 |
-
|
| 91 |
-
qa_chain = setup_retrieval_chain(model)
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
css = """
|
| 98 |
Your custom CSS here
|
|
@@ -145,6 +161,7 @@ demo = gr.ChatInterface(
|
|
| 145 |
likeable=True,
|
| 146 |
layout="bubble",
|
| 147 |
height=400,
|
|
|
|
| 148 |
)
|
| 149 |
)
|
| 150 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from duckduckgo_search import DDGS
|
| 3 |
+
from typing import List, Dict, Tuple
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Environment variables and configurations
|
| 8 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
|
| 23 |
print(f"An error occurred during web search: {str(e)}")
|
| 24 |
return [{"error": f"An error occurred during web search: {str(e)}"}]
|
| 25 |
|
| 26 |
+
def summarize_results(query: str, search_results: List[Dict[str, str]], model: str) -> str:
|
| 27 |
+
try:
|
| 28 |
+
context = "\n\n".join([f"Title: {result['title']}\nContent: {result['body']}" for result in search_results])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
prompt = f"""Based on the following web search results about '{query}', please create a comprehensive news article.
|
| 31 |
+
Include key facts, relevant statistics, and expert opinions if available.
|
| 32 |
+
Ensure the article is well-structured with an introduction, main body, and conclusion.
|
| 33 |
+
Cite sources directly within the generated text and not at the end of the generated text, integrating URLs where appropriate to support the information provided:
|
| 34 |
|
| 35 |
+
{context}
|
| 36 |
+
|
| 37 |
+
Article:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
|
| 40 |
+
summary = DDGS().chat(prompt, model="llama-3-70b")
|
| 41 |
+
return summary
|
| 42 |
+
except Exception as e:
|
| 43 |
+
return f"An error occurred during summarization: {str(e)}"
|
| 44 |
|
| 45 |
+
def rephrase_query(previous_response: str, new_query: str) -> str:
|
| 46 |
+
client = InferenceClient(
|
| 47 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 48 |
+
token=huggingface_token,
|
|
|
|
| 49 |
)
|
| 50 |
+
|
| 51 |
+
if previous_response:
|
| 52 |
+
prompt = f"""Analyze the following previous response to understand the context:
|
| 53 |
+
|
| 54 |
+
{previous_response}
|
| 55 |
+
|
| 56 |
+
Now, given the new query:
|
| 57 |
+
|
| 58 |
+
{new_query}
|
| 59 |
+
|
| 60 |
+
Rephrase the new query to ensure it aligns with the context of the previous response. The rephrased query should be specific, concise, and optimized for a web search. Provide the rephrased query as a single-line response:"""
|
| 61 |
+
else:
|
| 62 |
+
prompt = f"""Given the new query:
|
| 63 |
+
|
| 64 |
+
{new_query}
|
| 65 |
+
|
| 66 |
+
Rephrase this query to make it more specific, concise, and optimized for a web search. Provide the rephrased query as a single-line response:"""
|
| 67 |
|
|
|
|
| 68 |
|
| 69 |
+
response = client.text_generation(prompt, max_new_tokens=30, temperature=0.3)
|
|
|
|
| 70 |
|
| 71 |
+
# Extract only the rephrased query from the response
|
| 72 |
+
rephrased_query = response.split("\n")[-1].strip()
|
| 73 |
+
return rephrased_query
|
| 74 |
+
|
| 75 |
+
def respond(message, chat_history, model, temperature, num_api_calls):
|
| 76 |
+
rephrased_query = message
|
| 77 |
+
if chat_history:
|
| 78 |
+
previous_response = chat_history[-1][1]
|
| 79 |
+
rephrased_query = rephrase_query(previous_response, message)
|
| 80 |
|
| 81 |
+
print(f"Initial Rephrased Query: {rephrased_query}")
|
| 82 |
+
|
| 83 |
+
final_summary = ""
|
| 84 |
+
for _ in range(num_api_calls):
|
| 85 |
+
if not rephrased_query or not isinstance(rephrased_query, str):
|
| 86 |
+
print(f"Invalid rephrased query: {rephrased_query}")
|
| 87 |
+
break
|
| 88 |
+
|
| 89 |
+
search_results = get_web_search_results(rephrased_query)
|
| 90 |
+
|
| 91 |
+
# If no results or error, try rephrasing and searching again
|
| 92 |
+
if not search_results or "error" in search_results[0]:
|
| 93 |
+
print(f"No results found for: {rephrased_query}. Attempting to rephrase.")
|
| 94 |
+
rephrased_query = rephrase_query("", rephrased_query) # Rephrase without context
|
| 95 |
+
print(f"New Rephrased Query: {rephrased_query}")
|
| 96 |
+
search_results = get_web_search_results(rephrased_query)
|
| 97 |
+
|
| 98 |
+
if not search_results:
|
| 99 |
+
final_summary += f"No search results found for the query: {rephrased_query}\n\n"
|
| 100 |
+
elif "error" in search_results[0]:
|
| 101 |
+
final_summary += search_results[0]["error"] + "\n\n"
|
| 102 |
+
else:
|
| 103 |
+
summary = summarize_results(rephrased_query, search_results, model)
|
| 104 |
+
final_summary += summary + "\n\n"
|
| 105 |
+
|
| 106 |
+
return final_summary if final_summary else "Unable to generate a response. Please try a different query."
|
| 107 |
+
|
| 108 |
+
#def initial_conversation():
|
| 109 |
+
# return [
|
| 110 |
+
# (None, "Welcome! I'm your AI-powered Web Search and PDF Chat Assistant. I can help you find information on the web, summarize content, and analyze PDF documents. What would you like to know?")
|
| 111 |
+
# ]
|
| 112 |
|
| 113 |
css = """
|
| 114 |
Your custom CSS here
|
|
|
|
| 161 |
likeable=True,
|
| 162 |
layout="bubble",
|
| 163 |
height=400,
|
| 164 |
+
# value=initial_conversation()
|
| 165 |
)
|
| 166 |
)
|
| 167 |
|