from flask import Flask, render_template_string, request, jsonify
import os
from groq import Groq
import re
from pypdf import PdfReader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
app = Flask(__name__)
app.static_folder = 'static'
client = Groq(
api_key="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",
)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = Chroma(embedding_function=embeddings, collection_name="doc_collection")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chat_history = []
HTML_TEMPLATE = """
"""
@app.route('/')
def index():
return render_template_string(HTML_TEMPLATE)
def process_file(file_obj):
if not file_obj:
return None
file_path = file_obj.filename
file_extension = os.path.splitext(file_path)[1].lower()
try:
if file_extension == ".pdf":
reader = PdfReader(file_obj)
file_text = "\n".join(page.extract_text() or "" for page in reader.pages)
elif file_extension == ".txt":
file_text = file_obj.read().decode('utf-8')
else:
raise ValueError(f"Unsupported file format: {file_extension}")
file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
file_splits = text_splitter.split_documents(file_docs)
vector_store.add_documents(file_splits)
return file_text
except Exception as e:
raise RuntimeError(f"Error processing file: {str(e)}")
@app.route('/chat', methods=['POST'])
def chat():
user_message = request.form.get('message', '')
uploaded_file = request.files.get('file')
system_prompt = "You are an AI assistant developed by Holding Khalij Fars, tasked with responding to user queries accurately and helpfully And youre default language for answering is Farsi unless user wnts you to asnwe rin another language."
messages = [{"role": "system", "content": system_prompt}]
model = "qwen/qwen3-32b"
relevant_content = ""
if uploaded_file:
try:
file_text = process_file(uploaded_file)
if file_text:
search_query = user_message
retrieved_docs = vector_store.similarity_search(search_query, k=3)
relevant_content = "\n".join(doc.page_content for doc in retrieved_docs)
if relevant_content:
user_message += f"\nRelevant document content: {relevant_content}"
messages.append({"role": "user", "content": user_message})
except Exception as e:
messages.append({"role": "user", "content": f"Error processing file: {str(e)}. {user_message}"})
else:
messages.append({"role": "user", "content": user_message})
try:
chat_completion = client.chat.completions.create(
messages=messages,
model=model,
)
ai_response = chat_completion.choices[0].message.content
think_parts = re.findall(r'