import os from utils.central_logging import setup_logging,get_logger import textwrap from langchain_openai import OpenAI from langchain_chroma import Chroma #from langchain_community.document_loaders import SeleniumURLLoader from dotenv import load_dotenv import os import openai from langchain_openai import ChatOpenAI from langchain_core.runnables import RunnableLambda import chromadb import gradio as gr import time import asyncio import nest_asyncio import threading import re from openai import OpenAI #import streamlit as st from whisper_singleton import get_embedding,save_file,transcribe_content from extract_text import pdf_to_documents,store_data from prompt import get_prompt,get_system_prompt load_dotenv("./.env") setup_logging() logger = get_logger("chat") _embedding = None _retriever = None _vectore_store = None openai_api_key = os.getenv("OPENAI_API_KEY") if openai_api_key: logger.info("Open ai api key has been set") else: logger.error("No open ai api key has been found") try: llm_openai = ChatOpenAI(model='gpt-3.5-turbo',temperature=0) client = OpenAI() logger.info("Clients has been initialized") except Exception as e: logger.exception(f"An exception occured: {e}") def handle_upload(file_path): global _embedding global _retriever _embedding = get_embedding() text_content = "" status_message = "" file_name = "./transcribe.txt" try: if file_path.lower().endswith(".pdf"): collection_name = "pdffiles" pdf_docs,_vectore_store = pdf_to_documents(file_path,"transcribe_db",collection_name,_embedding) text_content = "\n\n".join([doc.page_content for doc in pdf_docs]) status_message = "📄 PDF file uploaded — extraction implemented." logger.info(status_message) #save_file(file_name,text_content) elif file_path.lower().endswith(".mp3") or file_path.lower().endswith('.mp4'): print(f"path:{file_path}") if file_path.lower().endswith(".mp3"): collection_name = "audios" status_message = "🎧 MP3 uploaded — transcription implemented." logger.info(status_message) else: collection_name = "videos" status_message = "🎬 MP4 uploaded — video transcription implemented." logger.info(status_message) text_content = transcribe_content(file_path) _vectore_store = store_data(text_content,"transcribe_db",collection_name,_embedding) #save_file(file_name,text_content) else: status_message = "Invalid file format" except Exception as e: status_message = f"❌ Error processing file: {e}" logger.exception(status_message) _retriever = _vectore_store.as_retriever() return status_message,text_content def stream_response(user_input,history): history = history or [] history.append({"role": "user", "content": user_input}) history.append({"role": "assistant", "content": ""}) context = "" if _retriever is not None: docs = _retriever.invoke(user_input) context = "\n\n".join([d.page_content for d in docs]) formatted_history = "\n".join( f"{m['role'].capitalize()}: {m['content']}" for m in history ) system_prompt = get_system_prompt().format( history=formatted_history, context=context, user_message=user_input ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}, ] partial_reply = "" stream = client.chat.completions.create( model="gpt-4o-mini", messages=messages, stream=True, temperature = 0 ) for event in stream: delta = event.choices[0].delta if delta and delta.content: token = delta.content partial_reply += token history[-1]["content"] = partial_reply yield history, history, "" history[-1]["content"] = partial_reply yield history, history, ""