script added

31dc3aa4 · Elias · d18626fe · 31dc3aa4
Commit 31dc3aa4 authored Jun 26, 2025 by Elias
Hide whitespace changes
Inline Side-by-side

Showing with 87 additions and 0 deletions

TrainValTest3_bis_Random.py TrainValTest3_bis_Random.py +87 -0

No files found.
--- a/TrainValTest3_bis_Random.py
+++ b/TrainValTest3_bis_Random.py
+import re
+import ast
+import os
+from pympi import Praat
+
+dossier_textgrid = 'textGrid'
+fichier_txt = 'resTrainValTest2_Random_backup.txt'
+output_mots = set()
+
+with open(fichier_txt, 'r', encoding='utf-8') as f:
+    contenu_original = f.read()
+
+# Regex pour trouver chaque bloc .csv + Train : [(xmin, xmax, texte)]
+pattern_fichier_blocs = re.compile(
+    r'(?P<filename>\S+\.csv)\s*?\n\s*Train\s*:\s*(?P<train>\[.*?\])\s*?\n\s*Val\s*:\s*(?P<val>\[.*?\])\s*?\n\s*Test\s*:\s*(?P<test>\[.*?\])',
+    re.DOTALL
+)
+
+contenu_modifie = contenu_original
+
+for match in pattern_fichier_blocs.finditer(contenu_original):
+    filename = match.group('filename')
+    blocs = {
+        "Train": match.group('train'),
+        "Val": match.group('val'),
+        "Test": match.group('test'),
+    }
+
+    base_name = filename.split('-')[0]
+    
+    textgrid_path = os.path.join(dossier_textgrid, f"{base_name}.TextGrid")
+
+    if not os.path.exists(textgrid_path):
+        print(f"Fichier TextGrid manquant : {textgrid_path}")
+        continue
+
+    tg = Praat.TextGrid(textgrid_path)
+    tier_locuteur1 = tg.tiers[1].intervals
+    tier_locuteur2 = tg.tiers[4].intervals
+
+    nouvelles_lignes = [filename]
+
+    for split in ["Train", "Val", "Test"]:
+        try:
+            phrases = ast.literal_eval(blocs[split])
+        except Exception as e:
+            print(f"Erreur {split} pour {filename} : {e}")
+            nouvelles_lignes.append(f"{split} : {blocs[split]}")
+            continue
+
+        nouvelles_phrases = []
+
+        for xmin_phrase, xmax_phrase, _ in phrases:
+            mots = []
+
+            def extraire_mots(intervals):
+                for xmin_mot, xmax_mot, mot in intervals:
+                    if xmin_mot >= xmin_phrase and xmax_mot <= xmax_phrase:
+                        mot = mot.strip()
+                        if mot:
+                            mots.append(f'"{mot}"')
+                            output_mots.add(mot)
+
+            extraire_mots(tier_locuteur1)
+            extraire_mots(tier_locuteur2)
+
+            nouvelle_chaine = " ".join(mots)
+            nouvelles_phrases.append((xmin_phrase, xmax_phrase, nouvelle_chaine))
+
+        nouvelles_lignes.append(f"{split} : {repr(nouvelles_phrases)}")
+
+    # Remplacer bloc entier dans le texte original
+    bloc_original = match.group(0)
+    bloc_nouveau = "\n  ".join(nouvelles_lignes)
+    contenu_modifie = contenu_modifie.replace(bloc_original, bloc_nouveau)
+
+# Écriture du fichier modifié
+with open(fichier_txt, 'w', encoding='utf-8') as f_out:
+    f_out.write(contenu_modifie)
+
+
+# Enregistrer tous les mots extraits
+with open('res3_Random.txt', 'w', encoding='utf-8') as f_mots:
+    for mot in sorted(output_mots):
+        f_mots.write(f'"{mot}"\n')
+
+print("Traitement terminé pour Train, Val et Test.")