from flask import Flask, render_template_string, request, jsonify import os from groq import Groq import re from pypdf import PdfReader from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter app = Flask(__name__) app.static_folder = 'static' client = Groq( api_key="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", ) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_store = Chroma(embedding_function=embeddings, collection_name="doc_collection") text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chat_history = [] HTML_TEMPLATE = """ Chat

Hi User

☀️
How can I help you today?
""" @app.route('/') def index(): return render_template_string(HTML_TEMPLATE) def process_file(file_obj): if not file_obj: return None file_path = file_obj.filename file_extension = os.path.splitext(file_path)[1].lower() try: if file_extension == ".pdf": reader = PdfReader(file_obj) file_text = "\n".join(page.extract_text() or "" for page in reader.pages) elif file_extension == ".txt": file_text = file_obj.read().decode('utf-8') else: raise ValueError(f"Unsupported file format: {file_extension}") file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})] file_splits = text_splitter.split_documents(file_docs) vector_store.add_documents(file_splits) return file_text except Exception as e: raise RuntimeError(f"Error processing file: {str(e)}") @app.route('/chat', methods=['POST']) def chat(): user_message = request.form.get('message', '') uploaded_file = request.files.get('file') system_prompt = "You are an AI assistant developed by Holding Khalij Fars, tasked with responding to user queries accurately and helpfully And youre default language for answering is Farsi unless user wnts you to asnwe rin another language." messages = [{"role": "system", "content": system_prompt}] model = "qwen/qwen3-32b" relevant_content = "" if uploaded_file: try: file_text = process_file(uploaded_file) if file_text: search_query = user_message retrieved_docs = vector_store.similarity_search(search_query, k=3) relevant_content = "\n".join(doc.page_content for doc in retrieved_docs) if relevant_content: user_message += f"\nRelevant document content: {relevant_content}" messages.append({"role": "user", "content": user_message}) except Exception as e: messages.append({"role": "user", "content": f"Error processing file: {str(e)}. {user_message}"}) else: messages.append({"role": "user", "content": user_message}) try: chat_completion = client.chat.completions.create( messages=messages, model=model, ) ai_response = chat_completion.choices[0].message.content think_parts = re.findall(r'(.*?)', ai_response, re.DOTALL) thinking = '\n'.join(think_parts).strip() if think_parts else '' final_response = re.sub(r'.*?', '', ai_response, flags=re.DOTALL).strip() except Exception as e: thinking = '' final_response = '' return jsonify({'thinking': thinking, 'response': final_response}) if __name__ == '__main__': app.run(debug=True , port=7860 , host='0.0.0.0')