Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Q
qbe
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lifo
A
Anaïs Halftermeyer
queryByExample
qbe
Commits
31dc3aa4
Commit
31dc3aa4
authored
Jun 26, 2025
by
Elias
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
script added
parent
d18626fe
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
87 additions
and
0 deletions
+87
-0
TrainValTest3_bis_Random.py
TrainValTest3_bis_Random.py
+87
-0
No files found.
TrainValTest3_bis_Random.py
0 → 100644
View file @
31dc3aa4
import
re
import
ast
import
os
from
pympi
import
Praat
dossier_textgrid
=
'textGrid'
fichier_txt
=
'resTrainValTest2_Random_backup.txt'
output_mots
=
set
()
with
open
(
fichier_txt
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
contenu_original
=
f
.
read
()
# Regex pour trouver chaque bloc .csv + Train : [(xmin, xmax, texte)]
pattern_fichier_blocs
=
re
.
compile
(
r'(?P<filename>\S+\.csv)\s*?\n\s*Train\s*:\s*(?P<train>\[.*?\])\s*?\n\s*Val\s*:\s*(?P<val>\[.*?\])\s*?\n\s*Test\s*:\s*(?P<test>\[.*?\])'
,
re
.
DOTALL
)
contenu_modifie
=
contenu_original
for
match
in
pattern_fichier_blocs
.
finditer
(
contenu_original
):
filename
=
match
.
group
(
'filename'
)
blocs
=
{
"Train"
:
match
.
group
(
'train'
),
"Val"
:
match
.
group
(
'val'
),
"Test"
:
match
.
group
(
'test'
),
}
base_name
=
filename
.
split
(
'-'
)[
0
]
textgrid_path
=
os
.
path
.
join
(
dossier_textgrid
,
f
"{base_name}.TextGrid"
)
if
not
os
.
path
.
exists
(
textgrid_path
):
print
(
f
"Fichier TextGrid manquant : {textgrid_path}"
)
continue
tg
=
Praat
.
TextGrid
(
textgrid_path
)
tier_locuteur1
=
tg
.
tiers
[
1
]
.
intervals
tier_locuteur2
=
tg
.
tiers
[
4
]
.
intervals
nouvelles_lignes
=
[
filename
]
for
split
in
[
"Train"
,
"Val"
,
"Test"
]:
try
:
phrases
=
ast
.
literal_eval
(
blocs
[
split
])
except
Exception
as
e
:
print
(
f
"Erreur {split} pour {filename} : {e}"
)
nouvelles_lignes
.
append
(
f
"{split} : {blocs[split]}"
)
continue
nouvelles_phrases
=
[]
for
xmin_phrase
,
xmax_phrase
,
_
in
phrases
:
mots
=
[]
def
extraire_mots
(
intervals
):
for
xmin_mot
,
xmax_mot
,
mot
in
intervals
:
if
xmin_mot
>=
xmin_phrase
and
xmax_mot
<=
xmax_phrase
:
mot
=
mot
.
strip
()
if
mot
:
mots
.
append
(
f
'"{mot}"'
)
output_mots
.
add
(
mot
)
extraire_mots
(
tier_locuteur1
)
extraire_mots
(
tier_locuteur2
)
nouvelle_chaine
=
" "
.
join
(
mots
)
nouvelles_phrases
.
append
((
xmin_phrase
,
xmax_phrase
,
nouvelle_chaine
))
nouvelles_lignes
.
append
(
f
"{split} : {repr(nouvelles_phrases)}"
)
# Remplacer bloc entier dans le texte original
bloc_original
=
match
.
group
(
0
)
bloc_nouveau
=
"
\n
"
.
join
(
nouvelles_lignes
)
contenu_modifie
=
contenu_modifie
.
replace
(
bloc_original
,
bloc_nouveau
)
# Écriture du fichier modifié
with
open
(
fichier_txt
,
'w'
,
encoding
=
'utf-8'
)
as
f_out
:
f_out
.
write
(
contenu_modifie
)
# Enregistrer tous les mots extraits
with
open
(
'res3_Random.txt'
,
'w'
,
encoding
=
'utf-8'
)
as
f_mots
:
for
mot
in
sorted
(
output_mots
):
f_mots
.
write
(
f
'"{mot}"
\n
'
)
print
(
"Traitement terminé pour Train, Val et Test."
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment