Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Q
qbe
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lifo
A
Anaïs Halftermeyer
queryByExample
qbe
Commits
6dd42792
Commit
6dd42792
authored
Sep 16, 2025
by
Elias
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
get phonems
parent
0f33087e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
89 additions
and
0 deletions
+89
-0
README.md
README.md
+7
-0
transcriptionAfterMatching1_fichiers.py
transcriptionAfterMatching1_fichiers.py
+82
-0
No files found.
README.md
View file @
6dd42792
...
@@ -104,6 +104,13 @@ python3 decouper_wav_mots_uniquement.py annotation file "mot1" "mot2" "mot3" ...
...
@@ -104,6 +104,13 @@ python3 decouper_wav_mots_uniquement.py annotation file "mot1" "mot2" "mot3" ...
python3 matchQueryHS.py /path/to/your/turn/of/speech/npz /path/to/your/queries/npz /path/to/the/output/folder
--W1
1
--W2
1
python3 matchQueryHS.py /path/to/your/turn/of/speech/npz /path/to/your/queries/npz /path/to/the/output/folder
--W1
1
--W2
1
```
```
### • This script get the phonems of the pairs which are mached
**transcriptionAfterMatching1_fichiers.py :**
Output : csv files with transcriptions
```
bash
python3 transcriptionAfterMatching1_fichiers.py /path/to/your/matched/pairs /output/dir textGrid/
```
# Tools
# Tools
### • Counting the number of segments
### • Counting the number of segments
...
...
transcriptionAfterMatching1_fichiers.py
0 → 100644
View file @
6dd42792
import
os
import
csv
import
re
from
pathlib
import
Path
from
textgrid
import
TextGrid
import
argparse
def
extract_phonemes_from_textgrid
(
tg_path
,
abs_debut
,
abs_fin
,
tier_index
):
tg
=
TextGrid
.
fromFile
(
tg_path
)
if
tier_index
>=
len
(
tg
):
print
(
f
"Tier {tier_index} introuvable dans {tg_path}"
)
return
[]
tier
=
tg
[
tier_index
]
phonemes
=
[]
for
interval
in
tier
.
intervals
:
if
interval
.
maxTime
>
abs_debut
and
interval
.
minTime
<
abs_fin
:
label
=
interval
.
mark
.
strip
()
if
label
:
phonemes
.
append
(
label
)
return
phonemes
def
process_csv
(
input_csv
:
Path
,
output_csv
:
Path
,
textgrid_dir
:
Path
):
with
open
(
input_csv
,
newline
=
''
,
encoding
=
'utf-8'
)
as
csv_in
,
\
open
(
output_csv
,
'w'
,
newline
=
''
,
encoding
=
'utf-8'
)
as
csv_out
:
reader
=
csv
.
reader
(
csv_in
)
writer
=
csv
.
writer
(
csv_out
)
header
=
next
(
reader
)
writer
.
writerow
([
"file"
,
"phonemes"
])
for
row
in
reader
:
file_name
,
t0_str
,
tn_str
,
tokens
,
score
=
row
t0
=
float
(
t0_str
)
tn
=
float
(
tn_str
)
# Extraire start/end du nom de fichier
match
=
re
.
search
(
r"_(\d+\.\d+)_(\d+\.\d+)_feature"
,
file_name
)
if
not
match
:
print
(
f
"[ERREUR] Impossible d'extraire start/end depuis : {file_name}"
)
continue
start_csv
=
float
(
match
.
group
(
1
))
end_csv
=
float
(
match
.
group
(
2
))
abs_debut
=
start_csv
+
t0
abs_fin
=
start_csv
+
tn
# TextGrid correspondant
speaker_id
=
file_name
.
split
(
"_"
)[
0
]
.
split
(
"-"
)[
0
]
tg_file
=
textgrid_dir
/
f
"{speaker_id}.TextGrid"
if
not
tg_file
.
exists
():
print
(
f
"[ERREUR] TextGrid introuvable : {tg_file}"
)
continue
tier_index
=
5
if
"YBA"
in
file_name
else
2
phonemes
=
extract_phonemes_from_textgrid
(
tg_file
,
abs_debut
,
abs_fin
,
tier_index
)
phoneme_str
=
"-"
.
join
(
phonemes
)
wav_name
=
file_name
.
replace
(
"_feature"
,
".wav"
)
writer
.
writerow
([
wav_name
,
phoneme_str
])
print
(
f
"Écrit : {wav_name}, {phoneme_str}"
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Extraire phonèmes depuis TextGrid pour chaque CSV."
)
parser
.
add_argument
(
"input_dir"
,
type
=
Path
,
help
=
"Dossier contenant les fichiers CSV en entrée"
)
parser
.
add_argument
(
"output_dir"
,
type
=
Path
,
help
=
"Dossier où stocker les CSV transformés"
)
parser
.
add_argument
(
"textgrid_dir"
,
type
=
Path
,
help
=
"Dossier contenant les fichiers TextGrid"
)
args
=
parser
.
parse_args
()
args
.
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
csv_files
=
list
(
args
.
input_dir
.
glob
(
"*.csv"
))
print
(
f
"Trouvé {len(csv_files)} fichiers CSV dans {args.input_dir}"
)
for
csv_file
in
csv_files
:
output_csv
=
args
.
output_dir
/
csv_file
.
name
print
(
f
"Processing {csv_file} -> {output_csv}"
)
process_csv
(
csv_file
,
output_csv
,
args
.
textgrid_dir
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment