Reza-galaxy21 commited on
Commit
889c29c
·
verified ·
1 Parent(s): e4b6514

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +15 -69
utils.py CHANGED
@@ -1,69 +1,15 @@
1
- # utils.py
2
-
3
- import os
4
- import fitz # PyMuPDF
5
- import faiss
6
- import numpy as np
7
- from openai import OpenAI
8
- import hashlib
9
-
10
- # تنظیمات API
11
- openai_api_key = os.getenv("API_Key")
12
-
13
- # Embedder
14
- def get_embedding(text, model="text-embedding-ada-002"):
15
- from openai import OpenAI
16
- client = OpenAI(api_key=openai_api_key)
17
- response = client.embeddings.create(input=[text], model=model)
18
- return response.data[0].embedding
19
-
20
- # استخراج متن از PDF و بردارسازی
21
- def extract_text_and_vectors(files):
22
- documents = []
23
- for file in files:
24
- doc = fitz.open(file.name)
25
- for page_num, page in enumerate(doc):
26
- text = page.get_text()
27
- if text.strip():
28
- vector = get_embedding(text)
29
- documents.append({
30
- "file_name": file.name,
31
- "page_num": page_num + 1,
32
- "text": text,
33
- "vector": np.array(vector).astype("float32"),
34
- })
35
- return documents
36
-
37
- # ساخت ایندکس FAISS
38
- def build_faiss_index(documents, dim=1536):
39
- index = faiss.IndexFlatL2(dim)
40
- vectors = [doc["vector"] for doc in documents]
41
- index.add(np.array(vectors))
42
- return index
43
-
44
- # جستجو در FAISS
45
- def search_similar_content(query, documents, index, k=3):
46
- query_vector = np.array(get_embedding(query)).astype("float32").reshape(1, -1)
47
- D, I = index.search(query_vector, k)
48
- results = [documents[i] for i in I[0]]
49
- return results
50
-
51
- # فرمت‌دهی پاسخ
52
- def format_response(results):
53
- formatted = []
54
- for r in results:
55
- snippet = r["text"][:500].strip().replace('\n', ' ')
56
- formatted.append(f"""📄 **{r['file_name']}** | صفحه {r['page_num']}\n{text_shorten(snippet)}\n""")
57
- return "\n---\n".join(formatted)
58
-
59
- # کمک‌تابع برای خلاصه کردن متن
60
- def text_shorten(text, max_chars=300):
61
- return text if len(text) <= max_chars else text[:max_chars] + "..."
62
-
63
- # لاگ نمونه‌ای از اسناد پردازش‌شده
64
- def log_debug_info(documents, max_samples=2):
65
- info = f"📦 مجموع اسناد پردازش‌شده: {len(documents)}\n\n"
66
- for i, doc in enumerate(documents[:max_samples]):
67
- info += f"📝 فایل: {doc['file_name']} | صفحه: {doc['page_num']}\n"
68
- info += f"متن نمونه: {text_shorten(doc['text'])}\n\n"
69
- return info
 
1
+ import json
2
+
3
+ def load_material_db(path="material_db.json"):
4
+ with open(path, "r", encoding="utf-8") as f:
5
+ return json.load(f)
6
+
7
+ def filter_items(materials, pole_height, pole_power, conductor_size):
8
+ matched = []
9
+ for item in materials:
10
+ cond = item["conditions"]
11
+ if (cond["pole_height"] in ["-", pole_height]) and \
12
+ (cond["pole_power"] in ["-", pole_power]) and \
13
+ (cond["conductor_size"] in ["-", conductor_size]):
14
+ matched.append(item)
15
+ return matched