1yahoo commited on
Commit
aa5b70d
·
verified ·
1 Parent(s): f6e331c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -131
app.py CHANGED
@@ -1,149 +1,47 @@
1
- import gradio as gr
 
2
  from openai import OpenAI
3
  import os
4
  import chromadb
5
- from chromadb.utils import embedding_functions
6
- import pypdf
7
- import uuid
8
 
9
- # --- الإعدادات الفنية ---
10
- STORAGE_PATH = "/data/neural_memory" if os.path.exists("/data") else "./neural_memory"
11
- chroma_client = chromadb.PersistentClient(path=STORAGE_PATH)
12
- default_ef = embedding_functions.DefaultEmbeddingFunction()
13
- collection = chroma_client.get_or_create_collection(name="advanced_brain", embedding_function=default_ef)
14
-
15
- # --- 1. الابتكار في الحقن (Semantic Ingestion) ---
16
- def advanced_ingest(file_path):
17
- """حقن متقدم مع Metadata و Overlap و Normalization."""
18
- try:
19
- text = ""
20
- filename = os.path.basename(file_path)
21
-
22
- if file_path.endswith('.pdf'):
23
- reader = pypdf.PdfReader(file_path)
24
- pages_data = [(p.extract_text(), i+1) for i, p in enumerate(reader.pages)]
25
- else:
26
- with open(file_path, 'r', encoding='utf-8') as f:
27
- pages_data = [(f.read(), 1)]
28
 
29
- documents, metadatas, ids = [], [], []
30
-
31
- # إعدادات الـ Chunking الاحترافية
32
- chunk_size = 1000
33
- overlap = 200 # تداخل 20% لضمان اتصال المعنى
34
-
35
- for content, page_num in pages_data:
36
- content = content.replace('\t', ' ').strip() # Normalization بسيط
37
-
38
- for i in range(0, len(content), chunk_size - overlap):
39
- chunk = content[i : i + chunk_size]
40
- documents.append(chunk)
41
- metadatas.append({
42
- "source": filename,
43
- "page": page_num,
44
- "length": len(chunk)
45
- })
46
- ids.append(str(uuid.uuid4()))
47
-
48
- collection.add(documents=documents, metadatas=metadatas, ids=ids)
49
- return f"✅ تم حقن {len(documents)} قطعة معرفية من '{filename}' مع حفظ الميتا-داتا."
50
- except Exception as e:
51
- return f"❌ فشل الحقن: {str(e)}"
52
-
53
- # --- 2. الاسترجاع الذكي (Filtered Query) ---
54
- def smart_query(user_query, threshold=0.6):
55
- """استرجاع مع تصفية حسب درجة التشابه (Score Filtering)."""
56
- # نطلب نتائج أكثر ثم نصفيها
57
- results = collection.query(
58
- query_texts=[user_query],
59
- n_results=10,
60
- include=['documents', 'metadatas', 'distances']
61
- )
62
-
63
- context_parts = []
64
- for doc, meta, dist in zip(results['documents'][0], results['metadatas'][0], results['distances'][0]):
65
- # في ChromaDB الـ distance الأقل تعني تشابه أكبر (0 = متطابق)
66
- # نحولها إلى Score افتراضي (1 - dist)
67
- score = 1 - dist
68
- if score >= threshold:
69
- source_info = f"[المصدر: {meta['source']} | صفحة: {meta['page']}]"
70
- context_parts.append(f"{source_info}\n{doc}")
71
-
72
- return "\n\n---\n\n".join(context_parts) if context_parts else "لم يتم العثور على معرفة وثيقة الصلة."
73
-
74
- # --- 3. المحرك العصبي (The Engine) ---
75
- def neural_engine(message, history, system_prompt, base_url, api_key, temp, score_threshold):
76
- client = OpenAI(
77
- base_url=base_url or "https://router.huggingface.co/hf-inference/v1",
78
- api_key=api_key or os.getenv("HF_TOKEN")
79
- )
80
-
81
- knowledge = smart_query(message, threshold=score_threshold)
82
 
83
- enhanced_system = f"{system_prompt}\n\n[سياق المعرفة الموثق]:\n{knowledge}"
 
 
84
 
85
- messages = [{"role": "system", "content": enhanced_system}]
86
- for u, a in history:
87
- messages.append({"role": "user", "content": u})
88
- messages.append({"role": "assistant", "content": a})
89
- messages.append({"role": "user", "content": message})
90
 
91
- try:
92
  response = client.chat.completions.create(
93
  model="huihui-ai/Qwen2.5-72B-Instruct-abliterated",
94
  messages=messages,
95
- temperature=temp,
96
  stream=True
97
  )
98
- full_resp = ""
99
  for chunk in response:
100
  if chunk.choices[0].delta.content:
101
- full_resp += chunk.choices[0].delta.content
102
- yield full_resp
103
- except Exception as e:
104
- yield f"⚠️ Neural Glitch: {str(e)}"
105
-
106
- # --- 4. واجهة المستخدم المتقدمة ---
107
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal"), css=".gradio-container {background: #f9f9f9}") as demo:
108
- gr.Markdown("# 🧬 Neural OS v4.0 (Semantic Edition)")
109
-
110
- with gr.Tabs():
111
- with gr.Tab("💬 Interaction Console"):
112
- chatbot = gr.Chatbot(height=600, show_label=False)
113
- with gr.Row():
114
- msg_input = gr.Textbox(placeholder="اسأل العقل الاصطناعي...", scale=8)
115
- submit_btn = gr.Button("نفاذ", variant="primary")
116
-
117
- with gr.Tab("📚 Knowledge Vault"):
118
- with gr.Row():
119
- with gr.Column():
120
- file_input = gr.File(label="وثائق التدريب (PDF/TXT)")
121
- upload_btn = gr.Button("بدء المعالجة الدلالية", variant="secondary")
122
- with gr.Column():
123
- status_log = gr.TextArea(label="سجل العمليات", interactive=False)
124
-
125
- with gr.Tab("⚙️ Control Panel"):
126
- with gr.Row():
127
- with gr.Column():
128
- sys_p = gr.TextArea(label="System Persona", value="أنت محرك معرفي يستند إلى وثائق رسمية.")
129
- score_th = gr.Slider(0.0, 1.0, 0.4, label="Relevance Threshold", info="كلما زاد، كان الاسترجاع أدق وأقل كمية.")
130
- with gr.Column():
131
- endpoint = gr.Textbox(label="API Endpoint")
132
- token = gr.Textbox(label="Access Token", type="password")
133
- temp = gr.Slider(0, 1.5, 0.7, label="Temperature")
134
-
135
- # التفاعلات
136
- upload_btn.click(lambda files: "\n".join([advanced_ingest(f.name) for f in files]), [file_input], [status_log])
137
-
138
- def chat_logic(m, h, sp, url, t, tmp, th):
139
- gen = neural_engine(m, h, sp, url, t, tmp, th)
140
- h.append([m, ""])
141
- for res in gen:
142
- h[-1][1] = res
143
- yield "", h
144
 
145
- submit_btn.click(chat_logic, [msg_input, chatbot, sys_p, endpoint, token, temp, score_th], [msg_input, chatbot])
146
- msg_input.submit(chat_logic, [msg_input, chatbot, sys_p, endpoint, token, temp, score_th], [msg_input, chatbot])
147
 
148
  if __name__ == "__main__":
149
- demo.queue().launch()
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import StreamingResponse
3
  from openai import OpenAI
4
  import os
5
  import chromadb
 
 
 
6
 
7
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # إعدادات الذاكرة (نفس منطق الكود السابق)
10
+ STORAGE_PATH = "./neural_memory"
11
+ chroma_client = chromadb.PersistentClient(path=STORAGE_PATH)
12
+ collection = chroma_client.get_or_create_collection(name="advanced_brain_v6")
13
+
14
+ client = OpenAI(
15
+ base_url="https://router.huggingface.co/hf-inference/v1",
16
+ api_key=os.getenv("HF_TOKEN")
17
+ )
18
+
19
+ @app.post("/v1/chat/completions")
20
+ async def chat_proxy(request: Request):
21
+ data = await request.json()
22
+ messages = data.get("messages", [])
23
+ user_query = messages[-1]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # البحث في الذاكرة
26
+ results = collection.query(query_texts=[user_query], n_results=3)
27
+ knowledge = "\n".join(results['documents'][0]) if results['documents'] else ""
28
 
29
+ # حقن المعرفة في أول رسالة (System Prompt)
30
+ messages.insert(0, {"role": "system", "content": f"Context: {knowledge}"})
 
 
 
31
 
32
+ def stream_response():
33
  response = client.chat.completions.create(
34
  model="huihui-ai/Qwen2.5-72B-Instruct-abliterated",
35
  messages=messages,
 
36
  stream=True
37
  )
 
38
  for chunk in response:
39
  if chunk.choices[0].delta.content:
40
+ yield f"data: {chunk.choices[0].delta.content}\n\n"
41
+ yield "data: [DONE]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ return StreamingResponse(stream_response(), media_type="text/event-stream")
 
44
 
45
  if __name__ == "__main__":
46
+ import uvicorn
47
+ uvicorn.run(app, host="0.0.0.0", port=8000)