Spaces:
Runtime error
Runtime error
| import os | |
| import chainlit as cl | |
| from langchain.memory.buffer import ConversationBufferMemory | |
| from langchain_openai import ChatOpenAI, OpenAI | |
| from langchain.chains import LLMChain | |
| from prompts import default_prompt_template, doctor_prompt_template, default_prompt_template_no_sources, doctor_prompt_template_no_sources | |
| from dotenv import load_dotenv | |
| from chainlit.input_widget import Select, Switch, Slider | |
| from langchain_core.prompts import ChatPromptTemplate, PromptTemplate | |
| from math import exp | |
| import numpy as np | |
| from typing import Any, Dict, List, Tuple | |
| from langchain_core.output_parsers import BaseOutputParser | |
| from difflib import SequenceMatcher | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import nltk | |
| import re | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import json | |
| llm = ChatOpenAI( | |
| base_url="https://api.studio.nebius.com/v1/", | |
| api_key=os.environ.get("NEBIUS_API_KEY"), | |
| model = "meta-llama/Llama-3.3-70B-Instruct", | |
| temperature = 0.7 | |
| ).bind(logprobs=True) | |
| def get_wikipedia_page_content(page_title): | |
| #scraping wikipedia pages with the Revisions API | |
| page_title = re.sub(r"\s+", "", page_title).strip() | |
| url = f"https://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&titles={page_title}&formatversion=2&rvprop=content&rvslots=*" | |
| response = requests.get(url) | |
| data = response.json() | |
| return data["query"]["pages"][0]["revisions"][0]["slots"]["main"]["content"] | |
| def test_scrape_sim(link, response): | |
| tfidf_vectorizer = TfidfVectorizer() | |
| try: | |
| idx = link.rfind("/") | |
| title = link[idx+1:] | |
| tfidf_matrix = tfidf_vectorizer.fit_transform([get_wikipedia_page_content(title), response]) | |
| # tfidf_matrix = tfidf_vectorizer.fit_transform([scrape_web_text(link), response]) | |
| cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] | |
| return cosine_sim*100 | |
| except: | |
| return 0 | |
| config_file="config.json" | |
| def get_config(): | |
| with open(config_file, "r") as file: | |
| return json.load(file) | |
| def update_config(new_value): | |
| config = get_config() | |
| config["num_sources"] = new_value | |
| with open(config_file, "w") as file: | |
| json.dump(config, file, indent=4) | |
| def load_config(): | |
| with open("config.json","r") as file: | |
| return json.load(file) | |
| def generate_hypothetical_answer(question: str) -> str: | |
| """Have LLM generate a hypothetical answer to assist with bot response.""" | |
| prompt = PromptTemplate( | |
| input_variables=['question'], | |
| template=""" | |
| You are an AI assistant taked with generate a hypothetical answer to the following question. Your answer shoulld be detailed and comprehensive, | |
| as if you had access to all relevant information. This hypothetical answer will be used to improve document retrieval, so include key terms and concepts | |
| that might be relevant. Do not include phrases like "I think" or "It's possible that" - present the information as if it were factual. | |
| Question:{question} | |
| Hypothetical answer: | |
| """, | |
| ) | |
| return TransparentGPT_settings.llm.invoke(prompt.format(question=question)) | |
| def highest_log_prob(vals): | |
| """Calculates the perplexity score (confidence) of bot response.""" | |
| logprobs = [] | |
| for token in vals: | |
| logprobs += [token['logprob']] | |
| average_log_prob = sum(logprobs)/len(logprobs) | |
| return np.round(np.exp(average_log_prob)*100,2) | |