Spaces:

cryogenic22
/

RFP_Analyzer_Agent

Build error

App Files Files Community

RFP_Analyzer_Agent / utils /ai_utils.py

cryogenic22

Create ai_utils.py

332f2d0 verified about 1 year ago

raw

history blame contribute delete

3.36 kB

	import streamlit as st
	from typing import List
	from langchain_community.chat_models import ChatOpenAI
	from langchain_core.messages import SystemMessage, HumanMessage
	from langchain.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from utils.database import get_collection_documents, get_all_documents, get_embeddings_model, initialize_qa_system

	def generate_document_tags(content: str) -> List[str]:
	"""Generate tags for a document using AI."""
	try:
	llm = ChatOpenAI(temperature=0.2, model="gpt-3.5-turbo")

	prompt = """Analyze the following document content and generate relevant tags/keywords.
	Focus on key themes, topics, and important terminology.
	Return only the tags as a comma-separated list.
	Content: {content}"""

	response = llm.invoke([
	SystemMessage(content="You are a document analysis assistant. Generate relevant tags as a comma-separated list only."),
	HumanMessage(content=prompt.format(content=content[:2000]))
	])

	# Extract content from the AI message
	tags_text = response.content
	# Split the comma-separated string into a list
	tags = [tag.strip() for tag in tags_text.split(',')]
	return tags
	except Exception as e:
	st.error(f"Error generating tags: {e}")
	return []

	def initialize_chat_system(collection_id=None) -> bool:
	"""Initialize chat system with documents."""
	try:
	# Get documents based on collection or all documents
	documents = (get_collection_documents(st.session_state.db_conn, collection_id)
	if collection_id else get_all_documents(st.session_state.db_conn))

	if not documents:
	st.error("No documents found.")
	return False

	with st.spinner("Processing documents..."):
	# Initialize embeddings
	embeddings = get_embeddings_model()
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=50,
	length_function=len,
	)

	# Process all documents
	all_chunks = []
	for doc in documents:
	doc_chunks = text_splitter.split_text(doc['content'])
	chunks = [
	{
	'content': chunk,
	'metadata': {
	'source': doc['name'],
	'document_id': doc['id'],
	'collection_id': collection_id
	}
	}
	for chunk in doc_chunks
	]
	all_chunks.extend(chunks)

	# Create vector store
	vector_store = FAISS.from_texts(
	[chunk['content'] for chunk in all_chunks],
	embeddings,
	[chunk['metadata'] for chunk in all_chunks]
	)

	# Initialize QA system
	st.session_state.vector_store = vector_store
	st.session_state.qa_system = initialize_qa_system(vector_store)
	st.session_state.chat_ready = True
	return True

	except Exception as e:
	st.error(f"Error initializing chat system: {e}")
	return False