File size: 2,995 Bytes
0472254
 
 
 
 
 
 
 
 
 
e857a6a
0472254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd5e632
0472254
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
from data_cutter import create_db

# Constants
CHROMA_PATH = "chroma_db"
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"

print("πŸš€ Starting app...")

# 1. Initialize/Load Database
print("πŸ”„ Initializing database from data folder...")
# We rebuild the DB on startup to ensure it matches the current data
try:
    vectorstore = create_db()
    print("βœ… Database created successfully!")
except Exception as e:
    print(f"❌ Error creating database: {e}")
    # Fallback: try to load if exists, though create_db should have handled it
    if os.path.exists(CHROMA_PATH):
        print("⚠️ Attempting to load existing database...")
        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
    else:
        raise e

# 2. Load AI Model
print(f"πŸ€– Loading AI Model ({MODEL_ID})...")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
    
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        device=-1,  # Run on CPU
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
    )
    print("βœ… AI Model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    raise e

def chat_function(message, history):
    print(f"πŸ“¨ Received query: {message}")
    
    # Search documents
    results = vectorstore.similarity_search(message, k=3)
    context = "\n\n".join([doc.page_content for doc in results])
    
    # Prepare prompt
    messages = [
        {"role": "system", "content": "You are a helpful assistant. Answer the user's question based ONLY on the provided context. If the answer is not in the context, say you don't know."},
        {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {message}"}
    ]
    
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    # Generate response
    outputs = pipe(prompt)
    generated_text = outputs[0]['generated_text']
    
    # Extract response
    if "<|im_start|>assistant" in generated_text:
        response_text = generated_text.split("<|im_start|>assistant")[-1].strip()
    elif prompt in generated_text:
        response_text = generated_text.replace(prompt, "").strip()
    else:
        response_text = generated_text
        
    return response_text

# Create Gradio Interface
demo = gr.ChatInterface(
    fn=chat_function,
    title="RAG Chat with Your Data",
    description=f"Ask questions about your documents. Powered by {MODEL_ID}.",
    examples=["What is the main topic?", "Summarize the content."]
)

if __name__ == "__main__":
    demo.launch()