Update README.md
Browse files
README.md
CHANGED
|
@@ -131,6 +131,55 @@ The Claude-augmented version demonstrates the highest correlation with human jud
|
|
| 131 |
|
| 132 |
ComeTH-Augmented outperforms direct evaluations from state-of-the-art LLMs, while being more computationally efficient for large-scale translation quality assessments.
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
## License
|
| 135 |
|
| 136 |
```
|
|
|
|
| 131 |
|
| 132 |
ComeTH-Augmented outperforms direct evaluations from state-of-the-art LLMs, while being more computationally efficient for large-scale translation quality assessments.
|
| 133 |
|
| 134 |
+
## Advanced Usage Examples
|
| 135 |
+
|
| 136 |
+
### Basic Evaluation
|
| 137 |
+
|
| 138 |
+
```python
|
| 139 |
+
from comet import download_model, load_from_checkpoint
|
| 140 |
+
model_path = download_model("wasanx/ComeTH")
|
| 141 |
+
model = load_from_checkpoint(model_path)
|
| 142 |
+
|
| 143 |
+
translations = [
|
| 144 |
+
{
|
| 145 |
+
"src": "This is an English source text.",
|
| 146 |
+
"mt": "นี่คือข้อความภาษาอังกฤษ",
|
| 147 |
+
}
|
| 148 |
+
]
|
| 149 |
+
results = model.predict(translations, batch_size=8, gpus=1)
|
| 150 |
+
scores = results['scores']
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
### Batch Processing With Progress Tracking
|
| 154 |
+
|
| 155 |
+
```python
|
| 156 |
+
import pandas as pd
|
| 157 |
+
from tqdm import tqdm
|
| 158 |
+
|
| 159 |
+
df = pd.read_csv("translations.csv")
|
| 160 |
+
input_data = df[['src', 'mt']].to_dict('records')
|
| 161 |
+
|
| 162 |
+
batch_size = 32
|
| 163 |
+
all_scores = []
|
| 164 |
+
|
| 165 |
+
for i in tqdm(range(0, len(input_data), batch_size)):
|
| 166 |
+
batch = input_data[i:i+batch_size]
|
| 167 |
+
results = model.predict(batch, batch_size=len(batch), gpus=1)
|
| 168 |
+
all_scores.extend(results['scores'])
|
| 169 |
+
|
| 170 |
+
df['quality_score'] = all_scores
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
### System-Level Evaluation
|
| 174 |
+
|
| 175 |
+
```python
|
| 176 |
+
import numpy as np
|
| 177 |
+
|
| 178 |
+
systems = df.groupby('system_name')['quality_score'].agg(['mean', 'std', 'count']).reset_index()
|
| 179 |
+
systems = systems.sort_values('mean', ascending=False)
|
| 180 |
+
print(systems)
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
## License
|
| 184 |
|
| 185 |
```
|