to number of word occurrences

c4168778 · Elias · 03786cc6 · c4168778 · c4168778
Commit c4168778 authored Sep 26, 2025 by Elias
Show whitespace changes
Inline Side-by-side

Showing with 58 additions and 0 deletions

README.md README.md +5 -0

TopMotsParBin.py TopMotsParBin.py +53 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -70,6 +70,11 @@ python3 frequency_grouping.py
 ```bash
 python3 graphics.py
 ```
+**TopMotsParBin.py :** Bar chart to display top number of word occurrences per Bin.
+```bash
+python3 TopMotsParBin.py
+```
 # Clustering methods

--- a/TopMotsParBin.py
+++ b/TopMotsParBin.py
+import csv
+from collections import defaultdict
+import matplotlib.pyplot as plt
+input_file = "res7_Random.csv"
+# Dictionnaire : Bin : liste de tuples (mot, train)
+bin_mots = defaultdict(list)
+with open(input_file, mode="r", newline="") as f:
+    reader = csv.reader(f)
+    next(reader)  
+    for row in reader:
+        mot = row[0]
+        train = int(row[1])
+        bin_val = int(row[4])
+        if train >= 1:
+            bin_mots[bin_val].append((mot, train))
+# Récupérer les 2 mots les plus fréquents dans chaque bin, si un seul mot par bin, on le prend
+top_mots_par_bin = {}
+for bin_val, mots in bin_mots.items():
+    sorted_mots = sorted(mots, key=lambda x: x[1], reverse=True)
+    top_mots_par_bin[bin_val] = sorted_mots[:2]
+# Trier les bins par ordre croissant
+bins = sorted(top_mots_par_bin.keys())
+labels = []
+values = []
+for bin_val in bins:
+    for mot, train_val in top_mots_par_bin[bin_val]:
+        labels.append(f"{mot}\n(Bin {bin_val})")
+        values.append(train_val)
+# Affichage du bar chart
+plt.figure(figsize=(12, 6))
+bars = plt.bar(labels, values, color='lightcoral')
+# Ajouter les valeurs au-dessus
+for bar in bars:
+    height = bar.get_height()
+    plt.text(bar.get_x() + bar.get_width()/2, height + 0.5, str(height),
+             ha='center', va='bottom', fontsize=9)
+plt.title("Top words by Bin according to the train frequency")
+plt.ylabel("Occurrences in Train")
+plt.xticks(rotation=45, ha='right')
+plt.tight_layout()
+plt.grid(axis='y', linestyle='--', alpha=0.5)
+plt.show()