Spaces:
Runtime error
Runtime error
File size: 3,109 Bytes
0472254 f516652 0472254 38854c4 0472254 fc8e15c 0472254 6e52b29 0472254 6e52b29 0472254 6e52b29 0472254 6e52b29 38854c4 0472254 6e52b29 38854c4 6e52b29 fc8e15c 38854c4 6e52b29 0472254 38854c4 0472254 38854c4 fc8e15c 38854c4 fc8e15c 6e52b29 fc8e15c 46f5338 fc8e15c 38854c4 fc8e15c 38854c4 fc8e15c 38854c4 fc8e15c 38854c4 6e52b29 0472254 6e52b29 0472254 fc8e15c 80c4aa4 0472254 fc8e15c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
import os
from data_cutter import create_db
# Constants
CHROMA_PATH = "chroma_db"
MODEL_ID = "google/flan-t5-small" # Better for French Q&A
print("π Starting app...")
# 1οΈβ£ Initialize / Load Database
print("π Initializing database from data folder...")
try:
vectorstore = create_db()
print("β
Database created successfully!")
except Exception as e:
print(f"β Error creating database: {e}")
if os.path.exists(CHROMA_PATH):
print("β οΈ Attempting to load existing database...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
else:
raise e
# 2οΈβ£ Load LLM
print(f"π€ Loading AI Model ({MODEL_ID})...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=300,
device=-1, # CPU
do_sample=True,
temperature=0.7,
top_p=0.9,
)
print("β
AI Model loaded successfully!")
# 3οΈβ£ Chat function
def chat_function(message, history):
print(f"π¨ Question received: {message}")
try:
# Search for relevant chunks
results = vectorstore.similarity_search(message, k=3)
context = "\n\n".join([doc.page_content for doc in results])
# Build prompt optimized for Flan-T5
prompt = f"""Contexte du document:
{context}
Question: {message}
RΓ©pondez en franΓ§ais en vous basant uniquement sur le contexte ci-dessus. Si l'information n'est pas dans le contexte, dites "Je ne trouve pas cette information dans le document"."""
# Generate response
outputs = pipe(prompt, max_new_tokens=300, num_return_sequences=1)
response = outputs[0]['generated_text'].strip()
# Fallback if response is too short or empty
if len(response) < 10:
response = "Je n'ai pas trouvΓ© d'information pertinente dans le document pour rΓ©pondre Γ votre question."
print(f"β
Response generated: {response[:100]}...")
return response
except Exception as e:
error_msg = f"Erreur lors de la gΓ©nΓ©ration de la rΓ©ponse: {str(e)}"
print(f"β {error_msg}")
return error_msg
# 4οΈβ£ Gradio Interface
demo = gr.ChatInterface(
fn=chat_function,
title="π¬ RAG Chat - Documents en FranΓ§ais",
description=f"Posez des questions sur vos documents PDF en franΓ§ais. PropulsΓ© par {MODEL_ID}.",
examples=[
"Quel est le sujet principal du document ?",
"RΓ©sume le contenu principal.",
"Quelles sont les informations importantes ?"
]
)
if __name__ == "__main__":
demo.launch() |