File size: 3,109 Bytes
0472254
f516652
0472254
38854c4
0472254
 
 
 
 
 
fc8e15c
0472254
 
 
6e52b29
0472254
 
 
 
 
 
 
 
6e52b29
0472254
 
 
 
6e52b29
0472254
6e52b29
38854c4
 
 
 
 
0472254
6e52b29
38854c4
6e52b29
 
fc8e15c
38854c4
6e52b29
 
 
 
 
 
 
0472254
38854c4
0472254
38854c4
 
fc8e15c
38854c4
 
fc8e15c
 
6e52b29
 
fc8e15c
46f5338
fc8e15c
38854c4
 
fc8e15c
38854c4
 
fc8e15c
 
 
 
38854c4
 
 
 
fc8e15c
38854c4
 
6e52b29
0472254
6e52b29
0472254
 
fc8e15c
 
 
 
 
 
80c4aa4
0472254
 
 
fc8e15c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
import os
from data_cutter import create_db

# Constants
CHROMA_PATH = "chroma_db"
MODEL_ID = "google/flan-t5-small"  # Better for French Q&A

print("πŸš€ Starting app...")

# 1️⃣ Initialize / Load Database
print("πŸ”„ Initializing database from data folder...")
try:
    vectorstore = create_db()
    print("βœ… Database created successfully!")
except Exception as e:
    print(f"❌ Error creating database: {e}")
    if os.path.exists(CHROMA_PATH):
        print("⚠️ Attempting to load existing database...")
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
    else:
        raise e

# 2️⃣ Load LLM
print(f"πŸ€– Loading AI Model ({MODEL_ID})...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True
)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    device=-1,  # CPU
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
)
print("βœ… AI Model loaded successfully!")

# 3️⃣ Chat function
def chat_function(message, history):
    print(f"πŸ“¨ Question received: {message}")
    
    try:
        # Search for relevant chunks
        results = vectorstore.similarity_search(message, k=3)
        context = "\n\n".join([doc.page_content for doc in results])
        
        # Build prompt optimized for Flan-T5
        prompt = f"""Contexte du document:
{context}

Question: {message}

RΓ©pondez en franΓ§ais en vous basant uniquement sur le contexte ci-dessus. Si l'information n'est pas dans le contexte, dites "Je ne trouve pas cette information dans le document"."""
        
        # Generate response
        outputs = pipe(prompt, max_new_tokens=300, num_return_sequences=1)
        response = outputs[0]['generated_text'].strip()
        
        # Fallback if response is too short or empty
        if len(response) < 10:
            response = "Je n'ai pas trouvΓ© d'information pertinente dans le document pour rΓ©pondre Γ  votre question."
        
        print(f"βœ… Response generated: {response[:100]}...")
        return response
        
    except Exception as e:
        error_msg = f"Erreur lors de la gΓ©nΓ©ration de la rΓ©ponse: {str(e)}"
        print(f"❌ {error_msg}")
        return error_msg


# 4️⃣ Gradio Interface
demo = gr.ChatInterface(
    fn=chat_function,
    title="πŸ’¬ RAG Chat - Documents en FranΓ§ais",
    description=f"Posez des questions sur vos documents PDF en franΓ§ais. PropulsΓ© par {MODEL_ID}.",
    examples=[
        "Quel est le sujet principal du document ?",
        "RΓ©sume le contenu principal.",
        "Quelles sont les informations importantes ?"
    ]
)

if __name__ == "__main__":
    demo.launch()