Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,69 +1,15 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
client = OpenAI(api_key=openai_api_key)
|
| 17 |
-
response = client.embeddings.create(input=[text], model=model)
|
| 18 |
-
return response.data[0].embedding
|
| 19 |
-
|
| 20 |
-
# استخراج متن از PDF و بردارسازی
|
| 21 |
-
def extract_text_and_vectors(files):
|
| 22 |
-
documents = []
|
| 23 |
-
for file in files:
|
| 24 |
-
doc = fitz.open(file.name)
|
| 25 |
-
for page_num, page in enumerate(doc):
|
| 26 |
-
text = page.get_text()
|
| 27 |
-
if text.strip():
|
| 28 |
-
vector = get_embedding(text)
|
| 29 |
-
documents.append({
|
| 30 |
-
"file_name": file.name,
|
| 31 |
-
"page_num": page_num + 1,
|
| 32 |
-
"text": text,
|
| 33 |
-
"vector": np.array(vector).astype("float32"),
|
| 34 |
-
})
|
| 35 |
-
return documents
|
| 36 |
-
|
| 37 |
-
# ساخت ایندکس FAISS
|
| 38 |
-
def build_faiss_index(documents, dim=1536):
|
| 39 |
-
index = faiss.IndexFlatL2(dim)
|
| 40 |
-
vectors = [doc["vector"] for doc in documents]
|
| 41 |
-
index.add(np.array(vectors))
|
| 42 |
-
return index
|
| 43 |
-
|
| 44 |
-
# جستجو در FAISS
|
| 45 |
-
def search_similar_content(query, documents, index, k=3):
|
| 46 |
-
query_vector = np.array(get_embedding(query)).astype("float32").reshape(1, -1)
|
| 47 |
-
D, I = index.search(query_vector, k)
|
| 48 |
-
results = [documents[i] for i in I[0]]
|
| 49 |
-
return results
|
| 50 |
-
|
| 51 |
-
# فرمتدهی پاسخ
|
| 52 |
-
def format_response(results):
|
| 53 |
-
formatted = []
|
| 54 |
-
for r in results:
|
| 55 |
-
snippet = r["text"][:500].strip().replace('\n', ' ')
|
| 56 |
-
formatted.append(f"""📄 **{r['file_name']}** | صفحه {r['page_num']}\n{text_shorten(snippet)}\n""")
|
| 57 |
-
return "\n---\n".join(formatted)
|
| 58 |
-
|
| 59 |
-
# کمکتابع برای خلاصه کردن متن
|
| 60 |
-
def text_shorten(text, max_chars=300):
|
| 61 |
-
return text if len(text) <= max_chars else text[:max_chars] + "..."
|
| 62 |
-
|
| 63 |
-
# لاگ نمونهای از اسناد پردازششده
|
| 64 |
-
def log_debug_info(documents, max_samples=2):
|
| 65 |
-
info = f"📦 مجموع اسناد پردازششده: {len(documents)}\n\n"
|
| 66 |
-
for i, doc in enumerate(documents[:max_samples]):
|
| 67 |
-
info += f"📝 فایل: {doc['file_name']} | صفحه: {doc['page_num']}\n"
|
| 68 |
-
info += f"متن نمونه: {text_shorten(doc['text'])}\n\n"
|
| 69 |
-
return info
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
def load_material_db(path="material_db.json"):
|
| 4 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 5 |
+
return json.load(f)
|
| 6 |
+
|
| 7 |
+
def filter_items(materials, pole_height, pole_power, conductor_size):
|
| 8 |
+
matched = []
|
| 9 |
+
for item in materials:
|
| 10 |
+
cond = item["conditions"]
|
| 11 |
+
if (cond["pole_height"] in ["-", pole_height]) and \
|
| 12 |
+
(cond["pole_power"] in ["-", pole_power]) and \
|
| 13 |
+
(cond["conductor_size"] in ["-", conductor_size]):
|
| 14 |
+
matched.append(item)
|
| 15 |
+
return matched
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|