TestTraining

Paused

Noor22Tak commited on Mar 24, 2025

Commit

f7678e6

verified ·

1 Parent(s): 1849627

Create train.py

Files changed (1) hide show

train.py ADDED Viewed

+import pandas as pd
+import faiss
+import requests
+import numpy as np
+# Load dataset
+df = pd.read_csv('/app/news_dataset.csv')
+# Function to create textual representation
+def create_textual_representation(row):
+    return f"""
+    الكاتب: {row['writer']},
+    الموقع: {row['location']},
+    التاريخ: {row['date']},
+    الوقت: {row['time']},
+    العنوان: {row['title']},
+    الخبر: {row['news']}
+    """
+df['textual_representation'] = df.apply(create_textual_representation, axis=1)
+# FAISS setup
+dim = 4096
+index = faiss.IndexFlatL2(dim)
+x = np.zeros((len(df), dim), dtype='float32')
+# Generate embeddings using Llama 3.1 on Hugging Face API
+HF_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B"
+HEADERS = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
+for i, text in enumerate(df['textual_representation']):
+    print(f'Processing {i} instance')
+    response = requests.post(HF_API_URL, headers=HEADERS, json={"inputs": text})
+    embedding = response.json()[0]['embedding']
+    x[i] = np.array(embedding)
+# Add embeddings to FAISS index
+index.add(x)
+# Save FAISS index
+faiss.write_index(index, "/app/arabic_news_index")