Spaces:

afouda
/

Wisal_QA

Runtime error

App Files Files Community

Wisal_QA / Old_Document.py

afouda

Update Old_Document.py

1025bf6 verified 10 months ago

raw

history blame contribute delete

4.37 kB

	import os
	import asyncio
	from dotenv import load_dotenv
	import gradio as gr



	# if not (DEEPINFRA_TOKEN and WEAVIATE_URL and WEAVIATE_API_KEY):
	# raise ValueError("Please set all required keys in .env")

	# DeepInfra client
	from openai import OpenAI
	openai = OpenAI(
	api_key=DEEPINFRA_API_KEY,
	base_url="https://api.deepinfra.com/v1/openai",
	)

	# Weaviate client
	import weaviate
	from weaviate.classes.init import Auth
	from contextlib import contextmanager

	@contextmanager
	def weaviate_client():
	client = weaviate.connect_to_weaviate_cloud(
	cluster_url=WEAVIATE_URL,
	auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
	)
	try:
	yield client
	finally:
	client.close()

	# Global path tracker
	last_uploaded_path = None

	# Embed function
	def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
	all_embeddings = []
	for i in range(0, len(texts), batch_size):
	batch = texts[i : i + batch_size]
	try:
	resp = openai.embeddings.create(
	model="Qwen/Qwen3-Embedding-8B",
	input=batch,
	encoding_format="float"
	)
	batch_embs = [item.embedding for item in resp.data]
	all_embeddings.extend(batch_embs)
	except Exception as e:
	print(f"Embedding error: {e}")
	all_embeddings.extend([[] for _ in batch])
	return all_embeddings

	def encode_query(query: str) -> list[float] \| None:
	embs = embed_texts([query], batch_size=1)
	if embs and embs[0]:
	return embs[0]
	return None

	async def old_Document(query: str, top_k: int = 1) -> dict:
	qe = encode_query(query)
	if not qe:
	return {"answer": []}

	try:
	with weaviate_client() as client:
	coll = client.collections.get("Old_Documents")
	res = coll.query.near_vector(
	near_vector=qe,
	limit=top_k,
	return_properties=["text"]
	)
	if not getattr(res, "objects", None):
	return {"answer": []}
	return {
	"answer": [obj.properties.get("text", "[No Text]") for obj in res.objects]
	}
	except Exception as e:
	print("RAG Error:", e)
	return {"answer": []}

	# New functions to support Gradio app
	def ingest_file(path: str) -> str:
	global last_uploaded_path
	last_uploaded_path = path
	return f"Old document ingested: {os.path.basename(path)}"

	def answer_question(query: str) -> str:
	try:
	rag_resp = asyncio.run(old_Document(query))
	chunks = rag_resp.get("answer", [])
	if not chunks:
	return "Sorry, I couldn't find relevant content in the old document."

	return "\n".join(f"- {c}" for c in chunks)
	except Exception as e:
	return f"Error processing your request: {e}"

	# Gradio interface for Old Documents
	with gr.Blocks(title="Old Documents RAG") as demo:
	gr.Markdown("## Old Documents RAG")
	query = gr.Textbox(placeholder="Your question...", lines=2, label="Ask about Old Documents")
	doc_file = gr.File(label="Upload Old Document (PDF, DOCX, TXT)")
	btn = gr.Button("Submit")
	out = gr.Textbox(label="Answer from Old Documents", lines=8, interactive=False)

	def process_old_doc(query, doc_file):
	if doc_file:
	# Save and ingest the uploaded file
	upload_dir = os.path.join(os.path.dirname(__file__), "uploaded_docs")
	os.makedirs(upload_dir, exist_ok=True)
	safe_filename = os.path.basename(doc_file.name)
	save_path = os.path.join(upload_dir, safe_filename)
	with open(save_path, "wb") as f:
	f.write(doc_file.read())
	status = ingest_file(save_path)
	answer = answer_question(query)
	return f"{status}\n\n{answer}"
	else:
	# Use last uploaded file or return error if none exists
	if last_uploaded_path:
	answer = answer_question(query)
	return f"[Using previously uploaded document: {os.path.basename(last_uploaded_path)}]\n\n{answer}"
	else:
	return "No document uploaded. Please upload an old document to proceed."

	btn.click(fn=process_old_doc, inputs=[query, doc_file], outputs=out)

	if __name__ == "__main__":
	demo.launch(debug=True)