Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| import numpy as np | |
| # Open the .txt file in read mode with UTF-8 encoding which you uploaded | |
| with open("uni_dataset_embedding_friendly.txt", "r", encoding="utf-8") as file: | |
| # Read the entire contents of the file and store it in a variabled | |
| uni_dataset_text = file.read() | |
| # Print the text below | |
| print("success") | |
| chunks = [chunk.strip() for chunk in uni_dataset_text.split("\n---\n") if chunk.strip()] | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| chunk_embeddings = embedder.encode(chunks, convert_to_tensor= True) | |
| norm_chunk_embeddings = torch.nn.functional.normalize(chunk_embeddings, dim=1) | |
| def get_relevant_context(query, top_k=3): | |
| query_embedding = embedder.encode(query, convert_to_tensor=True) | |
| query_embedding = torch.nn.functional.normalize(query_embedding, dim=0) | |
| similarities = torch.matmul(norm_chunk_embeddings, query_embedding) | |
| k = min(top_k, similarities.shape[0]) | |
| top_k_indices = torch.topk(similarities, k=k).indices.cpu().tolist() | |
| selected = [chunks[i] for i in top_k_indices] | |
| return selected, top_k_indices | |
| client = InferenceClient("microsoft/phi-4") | |
| def respond(message, history): | |
| messages = [{"role": "system", "content": "you are a realistic and friendly environmental activist whos aim is to promote the use of renewable energy."}] | |
| if history: | |
| messages.extend(history) | |
| messages.append({"role": "user", "content":message}) | |
| response = client.chat_completion( | |
| messages = messages, | |
| temperature = 0.0, | |
| max_tokens=500, | |
| top_p = 1.0 | |
| ) | |
| return response['choices'][0]['message']['content'].strip() | |
| chatbot = gr.ChatInterface(respond, type = "messages", title = "Watt the bot?") #chatbot ui - conversation history and user input | |
| chatbot.launch() |