Commit c4168778 authored by Elias's avatar Elias

to number of word occurrences

parent 03786cc6
......@@ -70,6 +70,11 @@ python3 frequency_grouping.py
```bash
python3 graphics.py
```
**TopMotsParBin.py :** Bar chart to display top number of word occurrences per Bin.
```bash
python3 TopMotsParBin.py
```
# Clustering methods
......
import csv
from collections import defaultdict
import matplotlib.pyplot as plt
input_file = "res7_Random.csv"
# Dictionnaire : Bin : liste de tuples (mot, train)
bin_mots = defaultdict(list)
with open(input_file, mode="r", newline="") as f:
reader = csv.reader(f)
next(reader)
for row in reader:
mot = row[0]
train = int(row[1])
bin_val = int(row[4])
if train >= 1:
bin_mots[bin_val].append((mot, train))
# Récupérer les 2 mots les plus fréquents dans chaque bin, si un seul mot par bin, on le prend
top_mots_par_bin = {}
for bin_val, mots in bin_mots.items():
sorted_mots = sorted(mots, key=lambda x: x[1], reverse=True)
top_mots_par_bin[bin_val] = sorted_mots[:2]
# Trier les bins par ordre croissant
bins = sorted(top_mots_par_bin.keys())
labels = []
values = []
for bin_val in bins:
for mot, train_val in top_mots_par_bin[bin_val]:
labels.append(f"{mot}\n(Bin {bin_val})")
values.append(train_val)
# Affichage du bar chart
plt.figure(figsize=(12, 6))
bars = plt.bar(labels, values, color='lightcoral')
# Ajouter les valeurs au-dessus
for bar in bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2, height + 0.5, str(height),
ha='center', va='bottom', fontsize=9)
plt.title("Top words by Bin according to the train frequency")
plt.ylabel("Occurrences in Train")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment