File size: 6,099 Bytes
80b75c5
 
d20c2f3
b7610d0
bc0238f
39e3df4
 
da9b3be
a5e9d9d
107a8b5
f6859b1
9423662
b5406cb
9423662
 
a5e9d9d
b5406cb
 
f6859b1
 
 
 
 
 
b5406cb
 
 
 
 
 
 
 
 
d20c2f3
 
 
29ab5d0
d20c2f3
f6859b1
80b75c5
d20c2f3
 
29ab5d0
d20c2f3
 
 
 
 
 
 
 
 
 
 
 
 
c88b61d
ba9d15f
da9b3be
 
f6859b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5406cb
f6859b1
 
 
 
4458366
b5406cb
4458366
 
 
 
 
 
 
692a3ab
 
 
4458366
 
61f8be2
4458366
d20c2f3
692a3ab
 
4458366
d20c2f3
80b75c5
a5e9d9d
 
9423662
 
 
692a3ab
 
 
 
 
9423662
 
b5406cb
9423662
 
 
b5406cb
 
 
 
 
 
9423662
b7610d0
9423662
b5406cb
 
 
 
 
d20c2f3
9f1e7e8
 
9423662
 
 
d20c2f3
f6859b1
 
b5406cb
da9b3be
f6859b1
 
 
 
b5406cb
f6859b1
 
 
 
 
 
 
 
 
 
b8954f7
d20c2f3
 
b5406cb
 
f6859b1
d20c2f3
 
b5406cb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import gradio as gr
import os
import re
import requests
import numpy as np
import torch
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# --- CONFIGURATION ---
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_MODEL = "HuggingFaceH4/zephyr-7b-beta"  # Change this if needed
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"]
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

EMBEDDING_CACHE_FILE = "embeddings.npy"
CHUNKS_CACHE_FILE = "chunks.npy"

# --- FUNCTIONS ---

def test_model_connection():
    try:
        print("🔍 Testing Hugging Face model availability...")
        test_response = requests.get(HF_API_URL, headers=headers, timeout=10)
        print("Status Code:", test_response.status_code)
        print("Response JSON:", test_response.json())
    except Exception as e:
        print("❌ Connection Test Failed:", e)

def load_text_files(file_list):
    knowledge = ""
    for file_name in file_list:
        try:
            with open(file_name, "r", encoding="utf-8") as f:
                knowledge += "\n" + f.read()
        except Exception as e:
            print(f"Error reading {file_name}: {e}")
    return knowledge.strip()

def chunk_text(text, max_chunk_length=500):
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    current_chunk = ""
    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_chunk_length:
            current_chunk += " " + sentence
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

def embed_texts(texts):
    return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)

def save_cache(embeddings, chunks):
    np.save(EMBEDDING_CACHE_FILE, embeddings)
    np.save(CHUNKS_CACHE_FILE, np.array(chunks))

def load_cache():
    if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE):
        embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True)
        chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist()
        print("✅ Loaded cached embeddings and chunks.")
        return embeddings, chunks
    return None, None

def retrieve_chunks(query, top_k=5):
    query_embedding = embed_texts([query])
    distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k)
    return [chunks[i] for i in indices[0]]

def build_prompt(question):
    relevant_chunks = retrieve_chunks(question)
    context = "\n".join(relevant_chunks)

    system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen).
Important instructions:
1. Base your answers primarily on the provided lecture excerpts ("lecture_slides").
2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information.
3. If you are unsure, reply politely: 
   "Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht."
4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**.
5. Do not give vague or speculative answers — it's better to skip a question than guess.
6. **Never generate your own questions. Only respond to the given question.**
7. **Always respond in German.**
8. Make your answers clear, fact-based, and well-structured.
"""

    prompt = f"""{system_instruction}
Vorlesungsinhalte:
{context}
--- Ende der Vorlesungsinhalte ---
Frage des Nutzers (bitte nur diese beantworten): {question}
Antwort:"""
    return prompt

def respond(message, history):
    try:
        prompt = build_prompt(message)
        payload = {
            "inputs": prompt,
            "parameters": {
                "temperature": 0.2,
                "max_new_tokens": 400,
                "stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"]
            },
        }

        response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
        response.raise_for_status()
        output = response.json()

        if isinstance(output, list) and "generated_text" in output[0]:
            generated_text = output[0]["generated_text"]
            answer = generated_text[len(prompt):].strip()
        else:
            print("❗️HF API returned unexpected format:", output)
            answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen."

    except Exception as e:
        print("API Error:", e)
        try:
            print("Raw HF response:", response.text)
        except:
            pass
        answer = "❌ Error contacting the model. Please check your token, timeout, or model availability."

    if history is None:
        history = []

    history.append({"role": "assistant", "content": answer})
    return answer

# --- INIT SECTION ---

print("🔄 Initializing embedding model...")
model = SentenceTransformer(EMBEDDING_MODEL)

chunk_embeddings, chunks = load_cache()

if chunk_embeddings is None or chunks is None:
    print("🛠 No cache found. Processing text...")
    knowledge_base = load_text_files(FILES)
    chunks = chunk_text(knowledge_base)
    chunk_embeddings = embed_texts(chunks)
    save_cache(chunk_embeddings, chunks)
    print("✅ Embeddings and chunks cached.")

nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(chunk_embeddings)

# --- GRADIO INTERFACE ---

demo = gr.ChatInterface(
    fn=respond,
    title="📚 RAG Chatbot Finanzmärkte",
    description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.",
    chatbot=gr.Chatbot(type="messages"),
)

if __name__ == "__main__":
    test_model_connection()
    demo.launch(debug=True)