knowbot / ui /chat_handler.py
Decim@97
Knowbot first commit
04e75ed
import os
from utils.central_logging import setup_logging,get_logger
import textwrap
from langchain_openai import OpenAI
from langchain_chroma import Chroma
#from langchain_community.document_loaders import SeleniumURLLoader
from dotenv import load_dotenv
import os
import openai
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableLambda
import chromadb
import gradio as gr
import time
import asyncio
import nest_asyncio
import threading
import re
from openai import OpenAI
#import streamlit as st
from whisper_singleton import get_embedding,save_file,transcribe_content
from extract_text import pdf_to_documents,store_data
from prompt import get_prompt,get_system_prompt
load_dotenv("./.env")
setup_logging()
logger = get_logger("chat")
_embedding = None
_retriever = None
_vectore_store = None
openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key:
logger.info("Open ai api key has been set")
else:
logger.error("No open ai api key has been found")
try:
llm_openai = ChatOpenAI(model='gpt-3.5-turbo',temperature=0)
client = OpenAI()
logger.info("Clients has been initialized")
except Exception as e:
logger.exception(f"An exception occured: {e}")
def handle_upload(file_path):
global _embedding
global _retriever
_embedding = get_embedding()
text_content = ""
status_message = ""
file_name = "./transcribe.txt"
try:
if file_path.lower().endswith(".pdf"):
collection_name = "pdffiles"
pdf_docs,_vectore_store = pdf_to_documents(file_path,"transcribe_db",collection_name,_embedding)
text_content = "\n\n".join([doc.page_content for doc in pdf_docs])
status_message = "πŸ“„ PDF file uploaded β€” extraction implemented."
logger.info(status_message)
#save_file(file_name,text_content)
elif file_path.lower().endswith(".mp3") or file_path.lower().endswith('.mp4'):
print(f"path:{file_path}")
if file_path.lower().endswith(".mp3"):
collection_name = "audios"
status_message = "🎧 MP3 uploaded β€” transcription implemented."
logger.info(status_message)
else:
collection_name = "videos"
status_message = "🎬 MP4 uploaded β€” video transcription implemented."
logger.info(status_message)
text_content = transcribe_content(file_path)
_vectore_store = store_data(text_content,"transcribe_db",collection_name,_embedding)
#save_file(file_name,text_content)
else:
status_message = "Invalid file format"
except Exception as e:
status_message = f"❌ Error processing file: {e}"
logger.exception(status_message)
_retriever = _vectore_store.as_retriever()
return status_message,text_content
def stream_response(user_input,history):
history = history or []
history.append({"role": "user", "content": user_input})
history.append({"role": "assistant", "content": ""})
context = ""
if _retriever is not None:
docs = _retriever.invoke(user_input)
context = "\n\n".join([d.page_content for d in docs])
formatted_history = "\n".join(
f"{m['role'].capitalize()}: {m['content']}"
for m in history
)
system_prompt = get_system_prompt().format(
history=formatted_history,
context=context,
user_message=user_input
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
partial_reply = ""
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
stream=True,
temperature = 0
)
for event in stream:
delta = event.choices[0].delta
if delta and delta.content:
token = delta.content
partial_reply += token
history[-1]["content"] = partial_reply
yield history, history, ""
history[-1]["content"] = partial_reply
yield history, history, ""