Using yt-dlp to download youtube transcript

Creating a replacment for yt or python youtube-dl with yt-dlp, AI vibe cod

jon allen

~10 min read · June 10, 2025 (Updated: June 10, 2025) · Free: Yes

I was having a bit of trouble with yt and decided to convert to yt-dlp

Here is a prompt I wrote for Gemini:

write a python script cli script that execute this subprocess

yt-dlp --write-subs --sub-format vtt  --skip-download <youtube video url>  -o /tmp/trans1.out

then find the output in the /tmp directory ( linux obviously )

then parse with vtt module.

output should be send to stdout.

Okay, here's a Python CLI script that does what you've requested.

First, you'll need to install the `webvtt-py` library if you haven't already:
```bash
pip install webvtt-py
```
And ensure `yt-dlp` is installed and in your system's PATH.

Now, here's the script:

```python
#!/usr/bin/env python3

import subprocess
import argparse
import sys
from pathlib import Path
import webvtt # For parsing VTT files (pip install webvtt-py)
import os

def main():
    parser = argpwrite a python script cli script that execute this subprocessarse.ArgumentParser(
        description="Download YouTube video subtitles using yt-dlp, parse the VTT, and print to stdout."
    )
    parser.add_argument("youtube_url", help="The URL of the YouTube video.")
    parser.add_argument(
        "--output-base",
        default="/tmp/trans1.out",
        help="Base path for the temporary subtitle file (defwrite a python script cli script that execute this subprocessault: /tmp/trans1.out). "
             "yt-dlp will append language and .vtt extension to this base."
    )
    parser.add_argument(
        "--keep-file",
        action="store_true",
        help="Keep the downloaded .vtt file in /tmp instead of deleting it."
    )

    args = parser.parse_args()

    # The -o option for yt-dlp is an output *template*.
    # If you specify -o /tmp/trans1.out and --sub-format vtt,
    # yt-dlp will likely create something like /tmp/trans1.out.en.vtt
    output_base_path_template = Path(args.output_base)
    output_directory = output_base_path_template.parent
    # This is the part before yt-dlp adds ".lang.vtt"
    output_filename_stem_for_glob = output_base_path_template.name 

    # Ensure the output directory exists (though /tmp usuallywrite a python script cli script that execute this subprocess does)
    try:
        output_directory.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        print(f"Error creating output directory {output_directory}: {e}", file=sys.stderr)
        sys.exit(1)
        
    # Construct yt-dlp command
    command = [
        "yt-dlp",
        "--write-subs",
        "--sub-format", "vtt",
        "--skip-download",
        args.youtube_url,
        "-o", str(output_base_path_template) # Pass the template path
    ]

    print(f"Executing: {' '.join(command)}", file=sys.stderr)
    try:
        # Run the command. Capture output for better error repwrite a python script cli script that execute this subprocessorting.
        process = subprocess.run(command, check=True, capture_output=True, text=True)
        # Print yt-dlp's own output to stderr for diagnosis
        if process.stdout:
            print(f"yt-dlp STDOUT:\n{process.stdout.strip()}", file=sys.stderr)
        if process.stderr:
            print(f"yt-dlp STDERR:\n{process.stderr.strip()}", file=sys.stderr)

    except subprocess.CalledProcessError as e:
        print(f"Error executing yt-dlp. Return code: {e.returncode}", file=sys.stderr)
        if e.stdout:
            print(f"yt-dlp STDOUT:\n{e.stdout.strip()}", file=sys.stderr)
        if e.stderr:
            print(f"yt-dlp STDERR:\n{e.stderr.strip()}", file=sys.stderr)
        sys.exit(1)
    except FileNotFoundError:
        print("Error: yt-dlp command not found. Please ensure it's installed and in your PATH.", file=sys.stderr)
        sys.exit(1)

    # Find the downloaded VTT file(s)
    # yt-dlp will name it like <output_filename_stem_for_glob>.LANG.vtt
    # e.g., if output_base_path_template was /tmp/trans1.out, it might be /tmp/trans1.out.en.vtt
    
    # We search in the directory part of output_base_path_templawrite a python script cli script that execute this subprocesste
    # for files starting with the filename part, followed by anything, ending with .vtt
    glob_pattern = f"{output_filename_stem_for_glob}.*.vtt"
    vtt_files_found = list(output_directory.glob(glob_pattern))

    if not vtt_files_found:
        print(f"Error: No .vtt file found in {output_directory} matching pattern '{glob_pattern}'", file=sys.stderr)
        print("This could be because no subtitles were available or yt-dlp had an issue.", file=sys.stderr)
        sys.exit(1)

    # For simplicity, use the first VTT file found.
    # A more robust script might handle multiple languages or let the user choose.
    vtt_file_path = vtt_files_found[0]
    print(f"Found VTT file: {vtt_file_path}", file=sys.stderr)

    if len(vtt_files_found) > 1:
        print(f"Warning: Multiple VTT files found ({[str(f.name) for f in vtt_files_found]}). Processing the first one: {vtt_file_path.name}", file=sys.stderr)

    # Parse the VTT file
    try:
        vtt_contewrite a python script cli script that execute this subprocessnt = webvtt.read(str(vtt_file_path))
    except Exception as e:
        print(f"Error parsing VTT file {vtt_file_path}: {e}", file=sys.stderr)
        # Clean up before exiting on parse error
        if not args.keep_file:
            try:
                os.remove(vtt_file_path)
            except OSError as del_e:
                print(f"Error deleting {vtt_file_path} after parse error: {del_e}", file=sys.stderr)
        sys.exit(1)

    # Output parsed content to stdout
    print(f"\n--- Parsed Subtitles from {vtt_file_path.name} ---", file=sys.stderr) # Info to stderr
    for caption in vtt_content:
        # Output actual data to stdout
        print(f"{caption.start} --> {caption.end}")
        # VTT text can have newlines within a single cue.
        # Replace internal newlines with spaces for a cleaner single-line output per cue segment,
        # or print caption.text as is if multiline output for a single cue is desired.
        cleaned_text = caption.text.replace('\n', ' ').strip()
        print(cleaned_text)
        print() # Extra newline for readability between cues

    # Clean up the VTT file unless asked to keep it
    if not args.keep_file:
        try:
            os.remove(vtt_file_path)
            print(f"Cleaned up {vtt_file_path}", file=sys.stderr)
        except OSError as e:
            print(f"Error deleting {vtt_file_path}: {e}", file=sys.stderr)
    else:
        print(f"Kept VTT file: {vtt_file_path}", file=sys.stderr)

if __name__ == "__main__":
    main()
```

**How to use it:**write a python script cli script that execute this subprocess

1.  Save the code above as a Python file (e.g., `get_subs.py`).
2.  Make it executable: `chmod +x get_subs.py`
3.  Run it with a YouTube video URL:
    ```bash
    ./get_subs.py "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
    ```
    (Replace with a video URL that actually has subtitles for best results).

**Explanation:**

1.  **`#!/usr/bin/env python3`**: Shebang line to make the script executable.
2.  **`import ...`**: Imports necessary modules:
    *   `subprocess`: To run the `yt-dlp` command.
    *   `argparse`: To create a user-friendly command-line interface.
    *   `sys`: For `sys.exit()` and writing to `sys.stderr`.
    *   `pathlib.Path`: For modern, object-oriented path manipulationwrite a python script cli script that execute this subprocess.
    *   `webvtt`: The library for parsing `.vtt` files.
    *   `os`: For removing the temporary file.
3.  **`argparse` Setup**:
    *   Defines a positional argument `youtube_url`.
    *   Adds an optional `--output-base` argument, defaulting to `/tmp/trans1.out`. This is important because `yt-dlp` uses its `-o` option as an *output template*, not a final filename. It will append the language code (e.g., `.en`) and the extension (`.vtt`) to this base.
    *   Adds an optional `--keep-file` flag to prevent deletion of the downloaded `.vtt` file.
4.  **`yt-dlp` Command Execution**:
    *   The `yt-dlp` command is constructed as a list of strwrite a python script cli script that execute this subprocessings.
    *   `output_base_path_template` is passed to `yt-dlp`'s `-o` option.
    *   `subprocess.run()` executes the command.
        *   `check=True`: Raises a `CalledProcessError` if `yt-dlp` exits with a non-zero status.
        *   `capture_output=True, text=True`: Captures stdout and stderr from `yt-dlp` as text, which is useful for debugging.
    *   Error handling is included for `CalledProcessError` (if `yt-dlp` fails) and `FileNotFoundError` (if `yt-dlp` isn't installed).
5.  **Finding the Output File**:
    *   `yt-dlp` typically names subtitle files like `<output_template>.LANG.vtt` (e.g., `/tmp/trans1.out.en.vtt`).
    *   `output_diwrite a python script cli script that execute this subprocessrectory.glob(f"{output_filename_stem_for_glob}.*.vtt")` is used to find files matching this pattern. `output_filename_stem_for_glob` would be `trans1.out` in the default case.
    *   The script takes the first file found. If a video has subtitles in multiple languages and `yt-dlp` downloads them all, this will pick one.
6.  **Parsing with `webvtt`**:
    *   `webvtt.read(str(vtt_file_path))` parses the found VTT file.
    *   It then iterates through `vtt_content.captions` (or just `vtt_content` as it's iterable).
    *   `caption.stwrite a python script cli script that execute this subprocessart`, `caption.end`, and `caption.text` provide the timing and content of each subtitle cue.
    *   The parsed information is printed to `stdout`. Diagnostic messages (like "Executing...", "Found VTT file...", etc.) are printed to `stderr` to keep `stdout` clean for potential piping.
7.  **Cleanup**:
    *   Unless `--keep-file` is specified, `os.remove(vtt_file_path)` deletes the downloaded VTT file from `/tmp`.

This script provideswrite a python script cli script that execute this subprocess a robust way to achieve your goal, including error handling and clear feedback.

This didn't give what I wanted. The output had the time stamps

can we eliminate time stamps

00:43:56.840 --> 00:44:00.080
so at least I've got some snacks for the road.

what it came up with was the following:

KEY CHANGE:
The loop for printing parsed content now looks like this:

      
# Output parsed content (text only) to stdout
    print(f"\n--- Subtitle Text from {vtt_file_path.name} (timestamps omitted) ---", file=sys.stderr) # Info to stderr
    for caption in vtt_content:
        # VTT text can have newlines within a single cue.
        # Replace internal newlines with spaces for a cleaner single-line output per cue segment.
        cleaned_text = caption.text.replace('\n', ' ').strip()
        if cleaned_text: # Only print if there's actual text
            print(cleaned_text) # Output actual data (text only) to stdout

    

IGNORE_WHEN_COPYING_START
Use code with caution. Python
IGNORE_WHEN_COPYING_END

This will produce output like:

      
First line of subtitles.
Second line of subtitles, perhaps originally on multiple lines in VTT.
And so on.ey change:

The loop for printing parsed content now looks like this:

      
# Output parsed content (text only) to stdout
    print(f"\n--- Subtitle Text from {vtt_file_path.name} (timestamps omitted) ---", file=sys.stderr) # Info to stderr
    for caption in vtt_content:
        # VTT text can have newlines within a single cue.
        # Replace internal newlines with spaces for a cleaner single-line output per cue segment.
        cleaned_text = caption.text.replace('\n', ' ').strip()
        if cleaned_text: # Only print if there's actual text
            print(cleaned_text) # Output actual data (text only) to stdout

    

IGNORE_WHEN_COPYING_START
Use code with caution. Python
IGNORE_WHEN_COPYING_END

This will produce output like:

      
First line of subtitles.
Second line of subtitles, perhaps originally on multiple lines in VTT.
And so on.

    

IGNORE_WHEN_COPYING_START
Use code with caution.
IGNORE_WHEN_COPYING_END

All diagnostic information will still go to stderr, so stdout will contain only the extracted subtitle text, with each caption's text on a new line.

    

IGNORE_WHEN_COPYING_START
Use code with caution.
IGNORE_WHEN_COPYING_END

All diagnostic information will still go to stderr, so stdout will contain only the extracted subtitle text, with each caption's text on a new line.

the output is like the original yt — but there is a bit of noise from yt-dlp, but that is going to stderr. stdout is the transcript.

[jon2allen@jons-bad-ass-fedora-server-37 python]$ ./yt_trans_dlp.py https://www.youtube.com/watch?v=XmD1eGuX4GU
Executing: yt-dlp --write-subs --sub-format vtt --skip-download https://www.youtube.com/watch?v=XmD1eGuX4GU -o /tmp/trans1.out
yt-dlp STDOUT:
[youtube] Extracting URL: https://www.youtube.com/watch?v=XmD1eGuX4GU
[youtube] XmD1eGuX4GU: Downloading webpage
[youtube] XmD1eGuX4GU: Downloading tv client config
[youtube] XmD1eGuX4GU: Downloading tv player API JSON
[youtube] XmD1eGuX4GU: Downloading ios player API JSON
[youtube] XmD1eGuX4GU: Downloading player fc2a56a5-main
[youtube] XmD1eGuX4GU: Downloading m3u8 information
[info] XmD1eGuX4GU: Downloading subtitles: en-GB
[info] XmD1eGuX4GU: Downloading 1 format(s): 625+234
[info] Writing video subtitles to: /tmp/trans1.out.en-GB.vtt
[download] Destination: /tmp/trans1.out.en-GB.vtt

[download]    1.00KiB at  Unknown B/s (00:00:00)
[download]    3.00KiB at    2.79MiB/s (00:00:00)
[download]    7.00KiB at    4.58MiB/s (00:00:00)
[download]   15.00KiB at    7.42MiB/s (00:00:00)
[download]   31.00KiB at    5.13MiB/s (00:00:00)
[download]   63.00KiB at    7.00MiB/s (00:00:00)
[download]   75.50KiB at    7.54MiB/s (00:00:00)
[download] 100% of   75.50KiB in 00:00:00 at 1001.12KiB/s
yt-dlp STDERR:
WARNING: [youtube] Falling back to generic n function search
         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
WARNING: [youtube] XmD1eGuX4GU: nsig extraction failed: Some formats may be missing
         n = yR01YPe3cSkdGq- ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
WARNING: [youtube] XmD1eGuX4GU: nsig extraction failed: Some formats may be missing
         n = fpjqJn25cjVvz0a ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
WARNING: [youtube] XmD1eGuX4GU: nsig extraction failed: Some formats may be missing
         n = t6eAKIAlX74Ovj9 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
WARNING: [youtube] XmD1eGuX4GU: Some web client https formats have been skipped as they are missing a url. YouTube is forcing SABR streaming for this client. See  https://github.com/yt-dlp/yt-dlp/issues/12482  for more details
Found VTT file: /tmp/trans1.out.en-GB.vtt

--- Subtitle Text from trans1.out.en-GB.vtt (timestamps omitted) ---
In a land far, far away.
For the fifth time, I embarked on a journey of a lifetime,alias yt='/home/jon2allen/python/yt_trans_dlp.py'

you can alias the yt command in Linux:

alias yt='/home/jon2allen/python/yt_trans_dlp.py'alias yt='/home/jon2allen/python/yt_trans_dlp.py'

and there you have a complete replacement for yt. there could be some minor enhancements such as using pid for the output to support concurrent running of the script.

Here is the final code:

#!/usr/bin/env python3

import subprocess
import argparse
import sys
from pathlib import Path
import webvtt # For parsing VTT files (pip install webvtt-py)
import os

def main():
    parser = argparse.ArgumentParser(
        description="Download YouTube video subtitles using yt-dlp, parse the VTT, and print only the text to stdout."
    )
    parser.add_argument("youtube_url", help="The URL of the YouTube video.")
    parser.add_argument(
        "--output-base",
        default="/tmp/trans1.out",
        help="Base path for the temporary subtitle file (default: /tmp/trans1.out). "
             "yt-dlp will append language and .vtt extension to this base."
    )
    parser.add_argument(
        "--keep-file",
        action="store_true",
        help="Keep the downloaded .vtt file in /tmp instead of deleting it."
    )

    args = parser.parse_args()

    output_base_path_template = Path(args.output_base)
    output_directory = output_base_path_template.parent
    output_filename_stem_for_glob = output_base_path_template.name

    try:
        output_directory.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        print(f"Error creating output directory {output_directory}: {e}", file=sys.stderr)
        sys.exit(1)
        
    command = [
        "yt-dlp",
        "--write-subs",
        "--sub-format", "vtt",
        "--skip-download",
        args.youtube_url,
        "-o", str(output_base_path_template)
    ]

    print(f"Executing: {' '.join(command)}", file=sys.stderr)
    try:
        process = subprocess.run(command, check=True, capture_output=True, text=True)
        if process.stdout:
            print(f"yt-dlp STDOUT:\n{process.stdout.strip()}", file=sys.stderr)
        if process.stderr:
            print(f"yt-dlp STDERR:\n{process.stderr.strip()}", file=sys.stderr)

    except subprocess.CalledProcessError as e:
        print(f"Error executing yt-dlp. Return code: {e.returncode}", file=sys.stderr)
        if e.stdout:
            print(f"yt-dlp STDOUT:\n{e.stdout.strip()}", file=sys.stderr)
        if e.stderr:
            print(f"yt-dlp STDERR:\n{e.stderr.strip()}", file=sys.stderr)
        sys.exit(1)
    except FileNotFoundError:
        print("Error: yt-dlp command not found. Please ensure it's installed and in your PATH.", file=sys.stderr)
        sys.exit(1)

    glob_pattern = f"{output_filename_stem_for_glob}.*.vtt"
    vtt_files_found = list(output_directory.glob(glob_pattern))

    if not vtt_files_found:
        print(f"Error: No .vtt file found in {output_directory} matching pattern '{glob_pattern}'", file=sys.stderr)
        print("This could be because no subtitles were available or yt-dlp had an issue.", file=sys.stderr)
        sys.exit(1)

    vtt_file_path = vtt_files_found[0]
    print(f"Found VTT file: {vtt_file_path}", file=sys.stderr)

    if len(vtt_files_found) > 1:
        print(f"Warning: Multiple VTT files found ({[str(f.name) for f in vtt_files_found]}). Processing the first one: {vtt_file_path.name}", file=sys.stderr)

    try:
        vtt_content = webvtt.read(str(vtt_file_path))
    except Exception as e:
        print(f"Error parsing VTT file {vtt_file_path}: {e}", file=sys.stderr)
        if not args.keep_file:
            try:
                os.remove(vtt_file_path)
            except OSError as del_e:
                print(f"Error deleting {vtt_file_path} after parse error: {del_e}", file=sys.stderr)
        sys.exit(1)

    # Output parsed content (text only) to stdout
    print(f"\n--- Subtitle Text from {vtt_file_path.name} (timestamps omitted) ---", file=sys.stderr) # Info to stderr
    for caption in vtt_content:
        # VTT text can have newlines within a single cue.
        # Replace internal newlines with spaces for a cleaner single-line output per cue segment.
        cleaned_text = caption.text.replace('\n', ' ').strip()
        if cleaned_text: # Only print if there's actual text
            print(cleaned_text) # Output actual data (text only) to stdout

    if not args.keep_file:
        try:
            os.remove(vtt_file_path)
            print(f"Cleaned up {vtt_file_path}", file=sys.stderr)
        except OSError as e:
            print(f"Error deleting {vtt_file_path}: {e}", file=sys.stderr)
    else:
        print(f"Kept VTT file: {vtt_file_path}", file=sys.stderr)

if __name__ == "__main__":
    main()

#ai #prompting-technique #python #youtube #transcript

< Go to the original

Using yt-dlp to download youtube transcript

Creating a replacment for yt or python youtube-dl with yt-dlp, AI vibe cod

Reporting a Problem