Spyke

Bash script to download and search youtube subtitles and output clickable timestamped urls

cross-posted from: https://lemm.ee/post/23155648

Here is the script.


#!/usr/bin/env bash
# Download and search youtube subs
# deps yt-dlp ,awk, perl, any one or more of either ugrep, ripgrep, grep
# usage "script youtube_url"


main() {
    url="$@"
    check_if_url
    get_video_id
    search_for_downloaded_matching_files
    set_download_boolean_flag
    download_subs
    read_and_format_transcript_file
    echo_description_file
    user_search
}


# Iterate over the array and add items to the new array if they match the regex
check_if_url() {
    local regex='^https://[^[:space:]]+$'
        if ! [[ $url =~ $regex ]]; then
            echo "Invalid input. Valid input is a url matching regex ${regex}"
            exit 1
        fi
}


get_video_id() {
    video_id=$(echo "$url" | sed -n 's/.*v=\([^&]*\).*/\1/p')
}


search_for_downloaded_matching_files() {
    # Find newest created files matching the video_id
    transcript_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.vtt 2>/dev/null | head -n 1  )"
    description_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.description 2>/dev/null | head -n 1  )"
}


set_download_boolean_flag() {
    if [ -n "$transcript_file" ] && [ -n "$description_file" ]; then
        download=0 # FALSE
    else
        download=1 # TRUE
    fi
}


download_subs() {
    if [ "$download" -eq 1 ]; then
        yt-dlp --restrict-filenames --write-auto-sub --skip-download "${url}"
        yt-dlp --restrict-filenames --sub-langs=eng --write-subs --skip-download "${url}"
        yt-dlp --restrict-filenames --write-description --skip-download "${url}"
        # Search files again since they were just downloaded
        search_for_downloaded_matching_files
    fi
}


read_and_format_transcript_file() {
    perl_removed_dupes="$(perl -0777 -pe 's/^\d\d.*\n.*\n.*<\/c>//gm' <"${transcript_file}")"
    local prefix="https://www.youtube.com/watch?v=%24%7Bvideo_id%7D&t="
    local suffix="s"
    formated_transcript_file="$(awk -v pre="$prefix" -v suf="$suffix" '
    /^([0-9]{2}:){2}[0-9]{2}\.[0-9]{3}/ {
        split($1, a, /[:.]/);
        $1 = pre (int(a[1]*3600 + a[2]*60 + a[3]) - 3) suf;
        sub(/ --> [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/, "");
        sub(/ align:start position:0%$/, "");
        print;
        next;
    }
    {
        sub(/ align:start position:0%$/, "");
        print;
    }
    ' <<<"${perl_removed_dupes}")"
    #CRLF for ugrep to avoid ?bug? where before lines are not all outputted
    formated_transcript_file_CRLF=$(printf '%b' "$formated_transcript_file" | sed 's/$/\r/')
}


echo_description_file() {
    cat "${description_file}"
}


user_search() {
    echo -e "\n\n"
    read -rp "Enter regex (read as raw input): " search_term

    : ${app_count:=0}

    if command -v ug >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ugrep output"
        ug --pretty=never -B2 -A1 -i -Z+-~1 -e "${search_term}" --andnot "^https?:\/\/"  <<<"$formated_transcript_file_CRLF"
        ((app_count++))
    fi

    if command -v rg >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ripgrep output"
        rg -iP -B2 -A7 "^(?!https?:\/\/).*\K${search_term}" <<<"$formated_transcript_file"
        ((app_count++))
    fi
    
    if [ "$app_count" -eq 0 ]; then
        echo -e "\n\n\n\n"
        echo "Grep output"
        grep -iP -B2 -A1 "${search_term}" <<<"$formated_transcript_file"
        echo -e "\n\n"
        echo "Consider installing ripgrep and ugrep for better search"
        ((app_count++))
    fi
}


main "$@"

View original on lemm.ee

Comments

bashbycmysmiaczxotoy

Bash script to download and search youtube subtitles and output clickable timestamped urls

Here is the script.


#!/usr/bin/env bash
# Download and search youtube subs
# deps yt-dlp ,awk, perl, any one or more of either ugrep, ripgrep, grep
# usage "script youtube_url"


main() {
    url="$@"
    check_if_url
    get_video_id
    search_for_downloaded_matching_files
    set_download_boolean_flag
    download_subs
    read_and_format_transcript_file
    echo_description_file
    user_search
}


# Iterate over the array and add items to the new array if they match the regex
check_if_url() {
    local regex='^https://[^[:space:]]+$'
        if ! [[ $url =~ $regex ]]; then
            echo "Invalid input. Valid input is a url matching regex ${regex}"
            exit 1
        fi
}


get_video_id() {
    video_id=$(echo "$url" | sed -n 's/.*v=\([^&]*\).*/\1/p')
}


search_for_downloaded_matching_files() {
    # Find newest created files matching the video_id
    transcript_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.vtt 2>/dev/null | head -n 1  )"
    description_file="$(  /usr/bin/ls -t --time=creation "$PWD"/*${video_id}*\.description 2>/dev/null | head -n 1  )"
}


set_download_boolean_flag() {
    if [ -n "$transcript_file" ] && [ -n "$description_file" ]; then
        download=0 # FALSE
    else
        download=1 # TRUE
    fi
}


download_subs() {
    if [ "$download" -eq 1 ]; then
        yt-dlp --restrict-filenames --write-auto-sub --skip-download "${url}"
        yt-dlp --restrict-filenames --sub-langs=eng --write-subs --skip-download "${url}"
        yt-dlp --restrict-filenames --write-description --skip-download "${url}"
        # Search files again since they were just downloaded
        search_for_downloaded_matching_files
    fi
}


read_and_format_transcript_file() {
    perl_removed_dupes="$(perl -0777 -pe 's/^\d\d.*\n.*\n.*<\/c>//gm' <"${transcript_file}")"
    local prefix="https://www.youtube.com/watch?v=${video_id}&t="
    local suffix="s"
    formated_transcript_file="$(awk -v pre="$prefix" -v suf="$suffix" '
    /^([0-9]{2}:){2}[0-9]{2}\.[0-9]{3}/ {
        split($1, a, /[:.]/);
        $1 = pre (int(a[1]*3600 + a[2]*60 + a[3]) - 3) suf;
        sub(/ --> [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/, "");
        sub(/ align:start position:0%$/, "");
        print;
        next;
    }
    {
        sub(/ align:start position:0%$/, "");
        print;
    }
    ' <<<"${perl_removed_dupes}")"
    #CRLF for ugrep to avoid ?bug? where before lines are not all outputted
    formated_transcript_file_CRLF=$(printf '%b' "$formated_transcript_file" | sed 's/$/\r/')
}


echo_description_file() {
    cat "${description_file}"
}


user_search() {
    echo -e "\n\n"
    read -rp "Enter regex (read as raw input): " search_term

    : ${app_count:=0}

    if command -v ug >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ugrep output"
        ug --pretty=never -B2 -A1 -i -Z+-~1 -e "${search_term}" --andnot "^https?:\/\/"  <<<"$formated_transcript_file_CRLF"
        ((app_count++))
    fi

    if command -v rg >/dev/null 2>&1; then
        echo -e "\n\n\n\n"
        echo "Ripgrep output"
        rg -iP -B2 -A7 "^(?!https?:\/\/).*\K${search_term}" <<<"$formated_transcript_file"
        ((app_count++))
    fi
    
    if [ "$app_count" -eq 0 ]; then
        echo -e "\n\n\n\n"
        echo "Grep output"
        grep -iP -B2 -A1 "${search_term}" <<<"$formated_transcript_file"
        echo -e "\n\n"
        echo "Consider installing ripgrep and ugrep for better search"
        ((app_count++))
    fi
}


main "$@"

View original on lemm.ee

Comments

bash·Bashbycmysmiaczxotoy

Fast youtube download bash script using custom build of aria2

I made a script that downloads from youtube super fast using a custom aria2 build.

Aria2 https://github.com/P3TERX/Aria2-Pro-Core/releases

ffmpeg build https://github.com/yt-dlp/FFmpeg-Builds/releases

I choose ffmpeg-master-latest-linux64-gpl.tar.xz

#!/usr/bin/env bash
#set -x

if [[ -z $@ ]]; then
    echo "specify download url"
    exit
fi

dir_dl="$PWD"
url="$@"

ffmpeg_dir="$HOME/.local/bin.notpath/"
download_archive_dir="$HOME/Videos/yt-dlp/"
download_archive_filename=".yt-dlp-archived-done.txt"

mkdir -p "$download_archive_dir"

youtube_match_regex='^.*(youtube[.]com|youtu[.]be|youtube-nocookie[.]com).*$'

if [[ "$1" =~ $youtube_match_regex ]]; then
    url="$(echo "$@" | perl -pe 's/((?:http:|https:)*?\/\/(?:www\.|)(?:youtube\.com|m\.youtube\.com|youtu\.|#youtube-nocookie\.com).*(?:c(?:hannel)?\/|u(?:ser)?\/|v=|v%3D|v\/|(?:a|p)\/(?:a|u)\/\d.*\/|watch\?|vi(?:=|\/)|\/#embed\/|oembed\?|be\/|e\/)([^&amp;?%#\/\n]+)).*/$1/gm')"
    yt-dlp \
    --check-formats \
    --clean-info-json \
    --download-archive "$download_archive_dir$download_archive_filename" \
    --embed-chapters \
    --embed-info-json \
    --embed-metadata \
    --embed-thumbnail \
    --external-downloader aria2c \
    --downloader-args \
    "aria2c: \
        --allow-piece-length-change=true \
        --check-certificate=false \
        --console-log-level=notice \
        --content-disposition-default-utf8=true \
        --continue=true \
        --disk-cache=8192 \
        --download-result=full \
        --enable-mmap \
        --file-allocation=falloc \
        --lowest-speed-limit=100K \
        --max-concurrent-downloads=16 \
        --max-connection-per-server=64 \
        --max-mmap-limit=8192M \
        --max-resume-failure-tries=5 \
        --max-file-not-found=2 \
        --max-tries=3 \
        --min-split-size=64K \
        --no-file-allocation-limit=8192M \
        --piece-length=64k \
        --realtime-chunk-checksum=false \
        --retry-on-400=true \
        --retry-on-403=true \
        --retry-on-406=true \
        --retry-on-unknown=true \
        --retry-wait=1 \
        --split=32 \
        --stream-piece-selector=geom \
        --summary-interval=0 " \
    --ffmpeg-location "$ffmpeg_dir" \
    --output "$dir_dl"'/%(channel)s/%(title)s_%(channel)s_%(upload_date>%Y-%m-%d)s_%(duration>%H-%M-%S)s_%(resolution)s.%(ext)s' \
    --prefer-free-formats \
    --remux-video mkv \
    --restrict-filenames \
    --sponsorblock-remove "filler,interaction,intro,music_offtopic,outro,preview,selfpromo,sponsor" \
    --sub-langs "en.*,live_chat" \
    --write-auto-subs \
    --write-description \
    --write-info-json \
    --write-playlist-metafiles \
    --write-subs \
    --write-thumbnail \
    "$url"
else
    yt-dlp \
    --download-archive "$download_archive_dir$download_archive_filename" \
    --embed-chapters \
    --ffmpeg-location "$ffmpeg_dir" \
    --http-chunk-size 10M \
    --output "$dir_dl/%(title)s_%(duration>%H-%M-%S)s_%(upload_date>%Y-%m-%d)s_%(resolution)s_URL_(%(id)s).%(ext)s" \
    --prefer-free-formats \
    --restrict-filenames \
    "$url"
fi

View original on lemm.ee

Comments

mpv·MPV command line video playerbycmysmiaczxotoy

My MPV lua script isn't reading my script-opts file. Why?

Edit: I don't know why but specifying the filename of the script-opts file made it work even though the basenames are the same.

Change:

From:

-- Define a table with default options
local o = {
    memory_usage_percentage = 80,
    reserved_memory_gb = 6
}

-- Read the options from the script-opts file
options.read_options(o)

-- Print the read options for debugging purposes
msg.info("memory_usage_percentage: " .. o.memory_usage_percentage)
msg.info("reserved_memory_gb: " .. o.reserved_memory_gb)

To:

-- Define a table with default options
local opts = {
    memory_usage_percentage = 80,
    reserved_memory_gb = 6
}

-- Read the options from the script-opts file
(require 'mp.options').read_options(opts, "demuxer-max-bytes")

-- Print the read options for debugging purposes
msg.info("memory_usage_percentage: " .. opts.memory_usage_percentage)
msg.info("reserved_memory_gb: " .. opts.reserved_memory_gb)

Original post: I am making a lua script to adjust buffer size dynamically based on available ram. The script is working with the hardcoded values: memory_usage_percentage = 80, reserved_memory_gb = 6 but it is not reading the new values from my script-opts file. Do you see why?

cat /home/cmysmiaczxotoy/.config/mpv/script-opts/demuxer-max-bytes.conf

memory_usage_percentage=70
reserved_memory_gb=8

cat /home/cmysmiaczxotoy/.config/mpv/scripts/demuxer-max-bytes.lua (Now edited with fix)

-- Require the necessary modules
local mp = require 'mp'
local msg = require 'mp.msg'

-- Define a table with default options
local opts = {
    memory_usage_percentage = 80,
    reserved_memory_gb = 6
}

-- Read the options from the script-opts file
(require 'mp.options').read_options(opts, "demuxer-max-bytes")

-- Print the read options for debugging purposes
msg.info("memory_usage_percentage: " .. opts.memory_usage_percentage)
msg.info("reserved_memory_gb: " .. opts.reserved_memory_gb)

local function is_windows()
    return package.config:sub(1,1) == '\\'
end

local function set_memory_properties(free_mem_kib, total_mem_kib)
    -- Calculate the percentage of the total memory in KiB
    local allowed_mem_kib = total_mem_kib * (opts.memory_usage_percentage / 100)
    -- Convert reserved memory from GB to KiB
    local reserved_mem_kib = opts.reserved_memory_gb * 1024 * 1024
    -- Calculate the amount of memory to use, leaving the reserved memory free
    local mem_to_use_kib = allowed_mem_kib - (total_mem_kib - free_mem_kib - reserved_mem_kib)

    -- Apply various checks and calculations to mem_to_use_kib
    if mem_to_use_kib > free_mem_kib - reserved_mem_kib then
        mem_to_use_kib = free_mem_kib - reserved_mem_kib
    end
    if mem_to_use_kib &lt; 0 then
        mem_to_use_kib = free_mem_kib - reserved_mem_kib
    end
    if mem_to_use_kib &lt; 0 then
        mem_to_use_kib = 1024
    end

    -- Convert to bytes and round to nearest integer
    local mem_to_use_bytes = math.floor(mem_to_use_kib * 1024)

    -- Set demuxer-max-bytes to the calculated value
    mp.set_property("demuxer-max-bytes", tostring(mem_to_use_bytes))
    mp.msg.info("Set demuxer-max-bytes to: " .. mem_to_use_bytes .. " bytes")
    
    -- Set cache-related properties
    mp.set_property("cache", "yes")
    mp.set_property("cache-pause", "yes")
    mp.set_property("force-seekable", "yes")
    mp.set_property("demuxer-readahead-secs", "30")
end

-- Function to gather memory info on Windows
local function get_memory_info_windows()
    local ps_script = 'Get-CimInstance Win32_OperatingSystem | ' ..
                      'ForEach-Object { $_.FreePhysicalMemory, $_.TotalVisibleMemorySize }'
    local handle = io.popen('powershell -NoProfile -Command "' .. ps_script .. '"', 'r')
    local free_mem_kib = handle:read("*l")
    local total_mem_kib = handle:read("*l")
    handle:close()

    return tonumber(free_mem_kib), tonumber(total_mem_kib)
end

-- Function to gather memory info on Linux
local function get_memory_info_linux()
    local total_mem_kib = 0
    local free_mem_kib = 0
    local meminfo = io.open("/proc/meminfo", "r")
    if meminfo then
        for line in meminfo:lines() do
            local key, value = line:match("(%w+):%s+(%d+)")
            if key == "MemAvailable" then
                free_mem_kib = tonumber(value)
            elseif key == "MemTotal" then
                total_mem_kib = tonumber(value)
            end
        end
        meminfo:close()
    end

    return free_mem_kib, total_mem_kib
end

-- This hook runs at the start of file loading
mp.register_event("start-file", function()
    local free_mem_kib, total_mem_kib

    if is_windows() then
        free_mem_kib, total_mem_kib = get_memory_info_windows()
    else
        free_mem_kib, total_mem_kib = get_memory_info_linux()
    end

    if total_mem_kib > 0 then
        set_memory_properties(free_mem_kib, total_mem_kib)
    else
        mp.msg.error("Could not determine total memory.")
    end
end)

View original on lemm.ee

Comments

datahoarderbycmysmiaczxotoy

15.36TB SSD SAMSUNG PM1633A SAS How do I connect it?

I bought a 15.36TB SSD SAMSUNG PM1633A SAS MZ-ILS15TA DELL EMC MZ1LS15THMLS-000D4

I am trying to figure out what to buy in order to connect it to my desktop PC via PCIE. Is this a viable or recommended solution?

SFF-8643 to SFF-8639 cable

Dell LSI 9311-8i 8-port Internal 12G SAS PCle x8 Host Bus RAID Adapter 3YDX4

View original on lemm.ee

Comments11

Posts

Bash script to download and search youtube subtitles and output clickable timestamped urls

Bash script to download and search youtube subtitles and output clickable timestamped urls

Fast youtube download bash script using custom build of aria2

My MPV lua script isn't reading my script-opts file. Why?

15.36TB SSD SAMSUNG PM1633A SAS How do I connect it?