Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Q
qbe
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lifo
A
Anaïs Halftermeyer
queryByExample
qbe
Commits
d18626fe
Commit
d18626fe
authored
Jun 26, 2025
by
Elias
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
script added
parent
2aa2104e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
82 additions
and
0 deletions
+82
-0
TrainValTest3_Random.py
TrainValTest3_Random.py
+82
-0
No files found.
TrainValTest3_Random.py
0 → 100755
View file @
d18626fe
import
re
import
ast
import
os
from
pympi
import
Praat
dossier_textgrid
=
'textGrid'
fichier_txt
=
'resTrainValTest2_Random.txt'
fichier_txt_backup
=
'resTrainValTest2_Random_backup.txt'
output_mots
=
set
()
# Sauvegarder le fichier original
with
open
(
fichier_txt
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
contenu_original
=
f
.
read
()
with
open
(
fichier_txt_backup
,
'w'
,
encoding
=
'utf-8'
)
as
f_backup
:
f_backup
.
write
(
contenu_original
)
# Regex pour trouver chaque bloc .csv + Train : [(xmin, xmax, texte)]
regex_bloc
=
re
.
compile
(
r'(?P<nom_csv>\S+\.csv)\s*Train\s*:\s*(?P<train_data>\[\(.*?\)\])'
,
re
.
DOTALL
)
contenu_modifie
=
contenu_original
# On travaille sur une copie
# Parcourir tous les blocs Train
for
match
in
regex_bloc
.
finditer
(
contenu_original
):
nom_csv
=
match
.
group
(
'nom_csv'
)
train_data_str
=
match
.
group
(
'train_data'
)
base_name
=
nom_csv
.
split
(
'-'
)[
0
]
textgrid_path
=
os
.
path
.
join
(
dossier_textgrid
,
f
"{base_name}.TextGrid"
)
if
not
os
.
path
.
exists
(
textgrid_path
):
print
(
f
"Fichier TextGrid manquant : {textgrid_path}"
)
continue
# Charger TextGrid
tg
=
Praat
.
TextGrid
(
textgrid_path
)
try
:
phrases
=
ast
.
literal_eval
(
train_data_str
)
except
Exception
as
e
:
print
(
f
"Erreur Train pour {nom_csv} : {e}"
)
continue
# Accès aux tiers locuteurs
tier_locuteur1
=
tg
.
tiers
[
1
]
.
intervals
tier_locuteur2
=
tg
.
tiers
[
4
]
.
intervals
nouvelles_phrases
=
[]
for
xmin_phrase
,
xmax_phrase
,
_
in
phrases
:
mots
=
[]
def
extraire_mots
(
intervals
):
for
xmin_mot
,
xmax_mot
,
mot
in
intervals
:
if
xmin_mot
>=
xmin_phrase
and
xmax_mot
<=
xmax_phrase
:
mot
=
mot
.
strip
()
if
mot
:
mots
.
append
(
f
'"{mot}"'
)
output_mots
.
add
(
mot
)
extraire_mots
(
tier_locuteur1
)
extraire_mots
(
tier_locuteur2
)
nouvelle_chaine
=
" "
.
join
(
mots
)
nouvelles_phrases
.
append
((
xmin_phrase
,
xmax_phrase
,
nouvelle_chaine
))
nouveau_bloc
=
f
"{nom_csv} Train : {repr(nouvelles_phrases)}"
contenu_modifie
=
contenu_modifie
.
replace
(
match
.
group
(
0
),
nouveau_bloc
)
# Réécrire le fichier texte modifié
with
open
(
fichier_txt
,
'w'
,
encoding
=
'utf-8'
)
as
f_modif
:
f_modif
.
write
(
contenu_modifie
)
# Écrire les mots extraits dans res3_Random.txt
with
open
(
'res3_Random.txt'
,
'w'
,
encoding
=
'utf-8'
)
as
f_mots
:
for
mot
in
sorted
(
output_mots
):
f_mots
.
write
(
f
'"{mot}"
\n
'
)
print
(
"Fichier mis à jour :"
,
fichier_txt
)
print
(
"Sauvegarde créée :"
,
fichier_txt_backup
)
print
(
"Mots extraits dans res3_Random.txt"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment