Spyke

Posts

Bash script to download and search youtube subtitles and output clickable timestamped urls

cross-posted from: https://lemm.ee/post/23155648

Here is the script.


#!/usr/bin/env bash
# Download and search youtube subs
# deps yt-dlp ,awk, perl, any one or more of either ugrep, ripgrep, grep
# usage "script youtube_url"


main() {
    url="$@"
    check_if_url
    get_video_id
    search_for_downloaded_matching_files
    set_download_boolean_flag
    download_subs
    read_and_format_transcript_file
    echo_description_file
    user_search
}


# Iterate over the array and add items to the new array if they match the regex
check_if_url() {
    local regex='^https://[^[:space:]]+$'
        if ! [[ $url =~ $regex ]]; then
            echo "Invalid input. Valid input is a url matching regex ${regex}"
            exit 1
        fi
}


get_video_id() {
    video_id=$(echo "$url" | sed -n 's/.*v=\([^&]*\).*/\1/p')
}


search_for_downloaded_matching_files() {
    # Find newest created files matching the video_id
    transcript_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.vtt 2>/dev/null | head -n 1  )"
    description_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.description 2>/dev/null | head -n 1  )"
}


set_download_boolean_flag() {
    if [ -n "$transcript_file" ] && [ -n "$description_file" ]; then
        download=0 # FALSE
    else
        download=1 # TRUE
    fi
}


download_subs() {
    if [ "$download" -eq 1 ]; then
        yt-dlp --restrict-filenames --write-auto-sub --skip-download "${url}"
        yt-dlp --restrict-filenames --sub-langs=eng --write-subs --skip-download "${url}"
        yt-dlp --restrict-filenames --write-description --skip-download "${url}"
        # Search files again since they were just downloaded
        search_for_downloaded_matching_files
    fi
}


read_and_format_transcript_file() {
    perl_removed_dupes="$(perl -0777 -pe 's/^\d\d.*\n.*\n.*<\/c>//gm' <"${transcript_file}")"
    local prefix="https://www.youtube.com/watch?v=%24%7Bvideo_id%7D&t="
    local suffix="s"
    formated_transcript_file="$(awk -v pre="$prefix" -v suf="$suffix" '
    /^([0-9]{2}:){2}[0-9]{2}\.[0-9]{3}/ {
        split($1, a, /[:.]/);
        $1 = pre (int(a[1]*3600 + a[2]*60 + a[3]) - 3) suf;
        sub(/ --> [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/, "");
        sub(/ align:start position:0%$/, "");
        print;
        next;
    }
    {
        sub(/ align:start position:0%$/, "");
        print;
    }
    ' <<<"${perl_removed_dupes}")"
    #CRLF for ugrep to avoid ?bug? where before lines are not all outputted
    formated_transcript_file_CRLF=$(printf '%b' "$formated_transcript_file" | sed 's/$/\r/')
}


echo_description_file() {
    cat "${description_file}"
}


user_search() {
    echo -e "\n\n"
    read -rp "Enter regex (read as raw input): " search_term

    : ${app_count:=0}

    if command -v ug >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ugrep output"
        ug --pretty=never -B2 -A1 -i -Z+-~1 -e "${search_term}" --andnot "^https?:\/\/"  <<<"$formated_transcript_file_CRLF"
        ((app_count++))
    fi

    if command -v rg >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ripgrep output"
        rg -iP -B2 -A7 "^(?!https?:\/\/).*\K${search_term}" <<<"$formated_transcript_file"
        ((app_count++))
    fi
    
    if [ "$app_count" -eq 0 ]; then
        echo -e "\n\n\n\n"
        echo "Grep output"
        grep -iP -B2 -A1 "${search_term}" <<<"$formated_transcript_file"
        echo -e "\n\n"
        echo "Consider installing ripgrep and ugrep for better search"
        ((app_count++))
    fi
}


main "$@"


    
View original on lemm.ee

Bash script to download and search youtube subtitles and output clickable timestamped urls

Here is the script.


#!/usr/bin/env bash
# Download and search youtube subs
# deps yt-dlp ,awk, perl, any one or more of either ugrep, ripgrep, grep
# usage "script youtube_url"


main() {
    url="$@"
    check_if_url
    get_video_id
    search_for_downloaded_matching_files
    set_download_boolean_flag
    download_subs
    read_and_format_transcript_file
    echo_description_file
    user_search
}


# Iterate over the array and add items to the new array if they match the regex
check_if_url() {
    local regex='^https://[^[:space:]]+$'
        if ! [[ $url =~ $regex ]]; then
            echo "Invalid input. Valid input is a url matching regex ${regex}"
            exit 1
        fi
}


get_video_id() {
    video_id=$(echo "$url" | sed -n 's/.*v=\([^&]*\).*/\1/p')
}


search_for_downloaded_matching_files() {
    # Find newest created files matching the video_id
    transcript_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.vtt 2>/dev/null | head -n 1  )"
    description_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.description 2>/dev/null | head -n 1  )"
}


set_download_boolean_flag() {
    if [ -n "$transcript_file" ] && [ -n "$description_file" ]; then
        download=0 # FALSE
    else
        download=1 # TRUE
    fi
}


download_subs() {
    if [ "$download" -eq 1 ]; then
        yt-dlp --restrict-filenames --write-auto-sub --skip-download "${url}"
        yt-dlp --restrict-filenames --sub-langs=eng --write-subs --skip-download "${url}"
        yt-dlp --restrict-filenames --write-description --skip-download "${url}"
        # Search files again since they were just downloaded
        search_for_downloaded_matching_files
    fi
}


read_and_format_transcript_file() {
    perl_removed_dupes="$(perl -0777 -pe 's/^\d\d.*\n.*\n.*<\/c>//gm' <"${transcript_file}")"
    local prefix="https://www.youtube.com/watch?v=${video_id}&t="
    local suffix="s"
    formated_transcript_file="$(awk -v pre="$prefix" -v suf="$suffix" '
    /^([0-9]{2}:){2}[0-9]{2}\.[0-9]{3}/ {
        split($1, a, /[:.]/);
        $1 = pre (int(a[1]*3600 + a[2]*60 + a[3]) - 3) suf;
        sub(/ --> [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/, "");
        sub(/ align:start position:0%$/, "");
        print;
        next;
    }
    {
        sub(/ align:start position:0%$/, "");
        print;
    }
    ' <<<"${perl_removed_dupes}")"
    #CRLF for ugrep to avoid ?bug? where before lines are not all outputted
    formated_transcript_file_CRLF=$(printf '%b' "$formated_transcript_file" | sed 's/$/\r/')
}


echo_description_file() {
    cat "${description_file}"
}


user_search() {
    echo -e "\n\n"
    read -rp "Enter regex (read as raw input): " search_term

    : ${app_count:=0}

    if command -v ug >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ugrep output"
        ug --pretty=never -B2 -A1 -i -Z+-~1 -e "${search_term}" --andnot "^https?:\/\/"  <<<"$formated_transcript_file_CRLF"
        ((app_count++))
    fi

    if command -v rg >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ripgrep output"
        rg -iP -B2 -A7 "^(?!https?:\/\/).*\K${search_term}" <<<"$formated_transcript_file"
        ((app_count++))
    fi
    
    if [ "$app_count" -eq 0 ]; then
        echo -e "\n\n\n\n"
        echo "Grep output"
        grep -iP -B2 -A1 "${search_term}" <<<"$formated_transcript_file"
        echo -e "\n\n"
        echo "Consider installing ripgrep and ugrep for better search"
        ((app_count++))
    fi
}


main "$@"


    
View original on lemm.ee

Fast youtube download bash script using custom build of aria2

I made a script that downloads from youtube super fast using a custom aria2 build.

Aria2 https://github.com/P3TERX/Aria2-Pro-Core/releases

ffmpeg build https://github.com/yt-dlp/FFmpeg-Builds/releases

I choose ffmpeg-master-latest-linux64-gpl.tar.xz

#!/usr/bin/env bash
#set -x

if [[ -z $@ ]]; then
    echo "specify download url"
    exit
fi

dir_dl="$PWD"
url="$@"

ffmpeg_dir="$HOME/.local/bin.notpath/"
download_archive_dir="$HOME/Videos/yt-dlp/"
download_archive_filename=".yt-dlp-archived-done.txt"

mkdir -p "$download_archive_dir"

youtube_match_regex='^.*(youtube[.]com|youtu[.]be|youtube-nocookie[.]com).*$'

if [[ "$1" =~ $youtube_match_regex ]]; then
    url="$(echo "$@" | perl -pe 's/((?:http:|https:)*?\/\/(?:www\.|)(?:youtube\.com|m\.youtube\.com|youtu\.|#youtube-nocookie\.com).*(?:c(?:hannel)?\/|u(?:ser)?\/|v=|v%3D|v\/|(?:a|p)\/(?:a|u)\/\d.*\/|watch\?|vi(?:=|\/)|\/#embed\/|oembed\?|be\/|e\/)([^&amp;?%#\/\n]+)).*/$1/gm')"
    yt-dlp \
    --check-formats \
    --clean-info-json \
    --download-archive "$download_archive_dir$download_archive_filename" \
    --embed-chapters \
    --embed-info-json \
    --embed-metadata \
    --embed-thumbnail \
    --external-downloader aria2c \
    --downloader-args \
    "aria2c: \
        --allow-piece-length-change=true \
        --check-certificate=false \
        --console-log-level=notice \
        --content-disposition-default-utf8=true \
        --continue=true \
        --disk-cache=8192 \
        --download-result=full \
        --enable-mmap \
        --file-allocation=falloc \
        --lowest-speed-limit=100K \
        --max-concurrent-downloads=16 \
        --max-connection-per-server=64 \
        --max-mmap-limit=8192M \
        --max-resume-failure-tries=5 \
        --max-file-not-found=2 \
        --max-tries=3 \
        --min-split-size=64K \
        --no-file-allocation-limit=8192M \
        --piece-length=64k \
        --realtime-chunk-checksum=false \
        --retry-on-400=true \
        --retry-on-403=true \
        --retry-on-406=true \
        --retry-on-unknown=true \
        --retry-wait=1 \
        --split=32 \
        --stream-piece-selector=geom \
        --summary-interval=0 " \
    --ffmpeg-location "$ffmpeg_dir" \
    --output "$dir_dl"'/%(channel)s/%(title)s_%(channel)s_%(upload_date>%Y-%m-%d)s_%(duration>%H-%M-%S)s_%(resolution)s.%(ext)s' \
    --prefer-free-formats \
    --remux-video mkv \
    --restrict-filenames \
    --sponsorblock-remove "filler,interaction,intro,music_offtopic,outro,preview,selfpromo,sponsor" \
    --sub-langs "en.*,live_chat" \
    --write-auto-subs \
    --write-description \
    --write-info-json \
    --write-playlist-metafiles \
    --write-subs \
    --write-thumbnail \
    "$url"
else
    yt-dlp \
    --download-archive "$download_archive_dir$download_archive_filename" \
    --embed-chapters \
    --ffmpeg-location "$ffmpeg_dir" \
    --http-chunk-size 10M \
    --output "$dir_dl/%(title)s_%(duration>%H-%M-%S)s_%(upload_date>%Y-%m-%d)s_%(resolution)s_URL_(%(id)s).%(ext)s" \
    --prefer-free-formats \
    --restrict-filenames \
    "$url"
fi

View original on lemm.ee
mpv·MPV command line video playerbycmysmiaczxotoy

My MPV lua script isn't reading my script-opts file. Why?

Edit: I don't know why but specifying the filename of the script-opts file made it work even though the basenames are the same.

Change:

From:

-- Define a table with default options
local o = {
    memory_usage_percentage = 80,
    reserved_memory_gb = 6
}

-- Read the options from the script-opts file
options.read_options(o)

-- Print the read options for debugging purposes
msg.info("memory_usage_percentage: " .. o.memory_usage_percentage)
msg.info("reserved_memory_gb: " .. o.reserved_memory_gb)

To:

-- Define a table with default options
local opts = {
    memory_usage_percentage = 80,
    reserved_memory_gb = 6
}

-- Read the options from the script-opts file
(require 'mp.options').read_options(opts, "demuxer-max-bytes")

-- Print the read options for debugging purposes
msg.info("memory_usage_percentage: " .. opts.memory_usage_percentage)
msg.info("reserved_memory_gb: " .. opts.reserved_memory_gb)

Original post: I am making a lua script to adjust buffer size dynamically based on available ram. The script is working with the hardcoded values: memory_usage_percentage = 80, reserved_memory_gb = 6 but it is not reading the new values from my script-opts file. Do you see why?

cat /home/cmysmiaczxotoy/.config/mpv/script-opts/demuxer-max-bytes.conf

memory_usage_percentage=70
reserved_memory_gb=8

cat /home/cmysmiaczxotoy/.config/mpv/scripts/demuxer-max-bytes.lua (Now edited with fix)

-- Require the necessary modules
local mp = require 'mp'
local msg = require 'mp.msg'

-- Define a table with default options
local opts = {
    memory_usage_percentage = 80,
    reserved_memory_gb = 6
}

-- Read the options from the script-opts file
(require 'mp.options').read_options(opts, "demuxer-max-bytes")

-- Print the read options for debugging purposes
msg.info("memory_usage_percentage: " .. opts.memory_usage_percentage)
msg.info("reserved_memory_gb: " .. opts.reserved_memory_gb)

local function is_windows()
    return package.config:sub(1,1) == '\\'
end

local function set_memory_properties(free_mem_kib, total_mem_kib)
    -- Calculate the percentage of the total memory in KiB
    local allowed_mem_kib = total_mem_kib * (opts.memory_usage_percentage / 100)
    -- Convert reserved memory from GB to KiB
    local reserved_mem_kib = opts.reserved_memory_gb * 1024 * 1024
    -- Calculate the amount of memory to use, leaving the reserved memory free
    local mem_to_use_kib = allowed_mem_kib - (total_mem_kib - free_mem_kib - reserved_mem_kib)

    -- Apply various checks and calculations to mem_to_use_kib
    if mem_to_use_kib > free_mem_kib - reserved_mem_kib then
        mem_to_use_kib = free_mem_kib - reserved_mem_kib
    end
    if mem_to_use_kib &lt; 0 then
        mem_to_use_kib = free_mem_kib - reserved_mem_kib
    end
    if mem_to_use_kib &lt; 0 then
        mem_to_use_kib = 1024
    end

    -- Convert to bytes and round to nearest integer
    local mem_to_use_bytes = math.floor(mem_to_use_kib * 1024)

    -- Set demuxer-max-bytes to the calculated value
    mp.set_property("demuxer-max-bytes", tostring(mem_to_use_bytes))
    mp.msg.info("Set demuxer-max-bytes to: " .. mem_to_use_bytes .. " bytes")
    
    -- Set cache-related properties
    mp.set_property("cache", "yes")
    mp.set_property("cache-pause", "yes")
    mp.set_property("force-seekable", "yes")
    mp.set_property("demuxer-readahead-secs", "30")
end

-- Function to gather memory info on Windows
local function get_memory_info_windows()
    local ps_script = 'Get-CimInstance Win32_OperatingSystem | ' ..
                      'ForEach-Object { $_.FreePhysicalMemory, $_.TotalVisibleMemorySize }'
    local handle = io.popen('powershell -NoProfile -Command "' .. ps_script .. '"', 'r')
    local free_mem_kib = handle:read("*l")
    local total_mem_kib = handle:read("*l")
    handle:close()

    return tonumber(free_mem_kib), tonumber(total_mem_kib)
end

-- Function to gather memory info on Linux
local function get_memory_info_linux()
    local total_mem_kib = 0
    local free_mem_kib = 0
    local meminfo = io.open("/proc/meminfo", "r")
    if meminfo then
        for line in meminfo:lines() do
            local key, value = line:match("(%w+):%s+(%d+)")
            if key == "MemAvailable" then
                free_mem_kib = tonumber(value)
            elseif key == "MemTotal" then
                total_mem_kib = tonumber(value)
            end
        end
        meminfo:close()
    end

    return free_mem_kib, total_mem_kib
end

-- This hook runs at the start of file loading
mp.register_event("start-file", function()
    local free_mem_kib, total_mem_kib

    if is_windows() then
        free_mem_kib, total_mem_kib = get_memory_info_windows()
    else
        free_mem_kib, total_mem_kib = get_memory_info_linux()
    end

    if total_mem_kib > 0 then
        set_memory_properties(free_mem_kib, total_mem_kib)
    else
        mp.msg.error("Could not determine total memory.")
    end
end)
View original on lemm.ee

You reached the end