| import os |
| from utils.central_logging import setup_logging,get_logger |
| import textwrap |
| from langchain_openai import OpenAI |
| from langchain_chroma import Chroma |
| |
| from dotenv import load_dotenv |
| import os |
| import openai |
|
|
|
|
| from langchain_openai import ChatOpenAI |
| from langchain_core.runnables import RunnableLambda |
| import chromadb |
|
|
| import gradio as gr |
| import time |
| import asyncio |
| import nest_asyncio |
| import threading |
| import re |
| from openai import OpenAI |
| |
|
|
| from whisper_singleton import get_embedding,save_file,transcribe_content |
| from extract_text import pdf_to_documents,store_data |
| from prompt import get_prompt,get_system_prompt |
|
|
|
|
| load_dotenv("./.env") |
|
|
| setup_logging() |
| logger = get_logger("chat") |
|
|
|
|
| _embedding = None |
| _retriever = None |
| _vectore_store = None |
|
|
| openai_api_key = os.getenv("OPENAI_API_KEY") |
|
|
| if openai_api_key: |
| logger.info("Open ai api key has been set") |
| else: |
| logger.error("No open ai api key has been found") |
|
|
|
|
|
|
|
|
| try: |
| llm_openai = ChatOpenAI(model='gpt-3.5-turbo',temperature=0) |
| client = OpenAI() |
| logger.info("Clients has been initialized") |
| except Exception as e: |
| logger.exception(f"An exception occured: {e}") |
|
|
|
|
|
|
| def handle_upload(file_path): |
| global _embedding |
| global _retriever |
| _embedding = get_embedding() |
| text_content = "" |
| status_message = "" |
| file_name = "./transcribe.txt" |
| try: |
| if file_path.lower().endswith(".pdf"): |
| |
| collection_name = "pdffiles" |
| pdf_docs,_vectore_store = pdf_to_documents(file_path,"transcribe_db",collection_name,_embedding) |
| text_content = "\n\n".join([doc.page_content for doc in pdf_docs]) |
| status_message = "π PDF file uploaded β extraction implemented." |
| logger.info(status_message) |
| |
| elif file_path.lower().endswith(".mp3") or file_path.lower().endswith('.mp4'): |
| print(f"path:{file_path}") |
| if file_path.lower().endswith(".mp3"): |
| collection_name = "audios" |
| status_message = "π§ MP3 uploaded β transcription implemented." |
| logger.info(status_message) |
| else: |
| collection_name = "videos" |
| status_message = "π¬ MP4 uploaded β video transcription implemented." |
| logger.info(status_message) |
| |
| text_content = transcribe_content(file_path) |
| _vectore_store = store_data(text_content,"transcribe_db",collection_name,_embedding) |
| |
| else: |
| status_message = "Invalid file format" |
| except Exception as e: |
| status_message = f"β Error processing file: {e}" |
| logger.exception(status_message) |
| _retriever = _vectore_store.as_retriever() |
| return status_message,text_content |
|
|
|
|
|
|
| def stream_response(user_input,history): |
| |
| history = history or [] |
|
|
| history.append({"role": "user", "content": user_input}) |
| history.append({"role": "assistant", "content": ""}) |
| |
| context = "" |
| if _retriever is not None: |
| docs = _retriever.invoke(user_input) |
| context = "\n\n".join([d.page_content for d in docs]) |
|
|
| formatted_history = "\n".join( |
| f"{m['role'].capitalize()}: {m['content']}" |
| for m in history |
| ) |
|
|
| |
|
|
| system_prompt = get_system_prompt().format( |
| history=formatted_history, |
| context=context, |
| user_message=user_input |
| ) |
|
|
| messages = [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_input}, |
| ] |
|
|
| partial_reply = "" |
|
|
| stream = client.chat.completions.create( |
| model="gpt-4o-mini", |
| messages=messages, |
| stream=True, |
| temperature = 0 |
| ) |
|
|
| for event in stream: |
| delta = event.choices[0].delta |
| if delta and delta.content: |
| token = delta.content |
| partial_reply += token |
| history[-1]["content"] = partial_reply |
| yield history, history, "" |
|
|
| history[-1]["content"] = partial_reply |
| yield history, history, "" |
|
|
|
|