Spaces:
Build error
Build error
File size: 2,763 Bytes
f67b389 61d768f 89b6253 61d768f 89b6253 d57e3be 89b6253 61d768f f67b389 89b6253 61d768f 89b6253 b573fa6 61d768f 89b6253 61d768f 89b6253 61d768f 89b6253 f49027f 89b6253 f49027f 89b6253 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | import gradio as gr
import faiss
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import numpy as np
# Load your FAISS index
index_path = "faiss_index/index.faiss" # Update with your FAISS index file path
index = faiss.read_index(index_path)
# Load the metadata
df = pd.read_pickle('df_news (1).pkl')
# Load the Hugging Face model and tokenizer
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
hf_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh-v1.5')
hf_model = AutoModel.from_pretrained('BAAI/bge-large-zh-v1.5')
# Define the function for similarity search
def search(query, k=10):
query_embedding = embedding_model.encode(query).astype('float32')
D, I = index.search(np.array([query_embedding]), k)
results = []
for idx in I[0]:
if idx < len(df): # Ensure the index is within bounds
doc = df.iloc[idx]
results.append({
'title': doc['title'],
'author': doc['author'],
'content': doc['full_text'],
'source': doc['url']
})
return results
# Define the function to generate a response based on the retrieved documents
def generate_answer(query, max_tokens, temperature, top_p):
# Perform similarity search
search_results = search(query)
context = "\n\n".join([f"Title: {doc['title']}\nContent: {doc['content']}" for doc in search_results])
# Construct the prompt
full_prompt = f"Context:\n{context}\n\nQuestion: {query}"
# Tokenize the input prompt
inputs = hf_tokenizer(full_prompt, return_tensors="pt")
# Generate a response using the model
output = hf_model.generate(
inputs["input_ids"],
max_length=max_tokens,
temperature=temperature,
top_p=top_p,
pad_token_id=hf_tokenizer.eos_token_id
)
# Decode the response and return it
response = hf_tokenizer.decode(output[0], skip_special_tokens=True)
return response
# Define the Gradio interface
def respond(message, max_tokens, temperature, top_p):
response = generate_answer(message, max_tokens, temperature, top_p)
return response
# Set up the Gradio demo
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(value="What is the latest news?", label="Query"),
gr.Slider(minimum=1, maximum=2048, value=150, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Top-p (nucleus sampling)")
],
outputs=[gr.Textbox()]
)
if __name__ == "__main__":
demo.launch()
|