Thursday, June 10, 2021

How to download WWDC2021 videos and subtitles

The scripts for previous WWDC are here https://iphonesdkdev.blogspot.com/2020/10/how-to-download-wwdc2020-videos-and.html

https://iphonesdkdev.blogspot.com/2017/07/how-to-fetch-wwdc-2017-video-subtitle.html

Create and Run this script wwdc2021_fetch_srt.sh to fetch WWDC2021 subtitle


wwdc2021_fetch_en.srt.sh    Select all
#!/bin/bash # @Last Modified by: javacom # @Last Modified time: 2021-06-10 WWDC_YEAR=2021; # WWDC_SESSION_PREFIX=https://developer.apple.com/videos/play/wwdc$WWDC_YEAR; WWDC_LOCAL_DIR=$(basename $WWDC_SESSION_PREFIX); detect_video_m3u8 () { local session_url=$WWDC_SESSION_PREFIX/$SESSION_ID/; local session_html=$(curl -s $session_url); local video_url=$(echo "$session_html" | grep .m3u8 | grep $SESSION_ID | head -n1 | sed "s#.*\"\(https://.*m3u8\)\".*#\1#"); echo "$session_html" | grep .mp4 | grep $SESSION_ID | sed "s#.*\"\(https://.*mp4\).*\".*#\1#" | while read mp4_url; do local mp4_filename=$(basename $mp4_url | cut -d. -f1); local srt_filename=$mp4_filename.en.srt; # local srt_filename=$mp4_filename.zh.srt; echo "> Subtitle local: $WWDC_LOCAL_DIR/$srt_filename" >&2; > $WWDC_LOCAL_DIR/$srt_filename; done echo "$video_url"; echo "> Video: $video_url" >&2; } detect_subtitle_m3u8 () { local video_url=$1; # en subtitle local subtitle_uri=$(curl -s $video_url | grep "LANGUAGE=\"eng\"" | grep "TYPE=SUBTITLES" | sed "s#.*URI=\"\(.*\)\"#\1#" | sed "s#.FORCED=NO##"); # zh subtitle #local subtitle_uri=$(curl -s $video_url | grep "LANGUAGE=\"zh\"" | sed "s#.*URI=\"\(.*\)\"#\1#"); local subtitle_url=$subtitle_uri; [[ "$subtitle_uri" != http* ]] && { subtitle_url=$(dirname $video_url)/$subtitle_uri; } echo "$subtitle_url"; echo "> Subtitle: $subtitle_url" >&2; } download_subtitle_contents () { local subtitle_url=$1; echo "> Downloading... " local subtitle_base_url=$(dirname $subtitle_url); curl -s $subtitle_url | grep "webvtt" | while read webvtt; do local subtitle_webvtt=$subtitle_base_url/$webvtt; #echo "- get $subtitle_webvtt"; local subtitle_content=$(curl -s $subtitle_webvtt); # en subtitle ls $WWDC_LOCAL_DIR/"wwdc$WWDC_YEAR"-"$SESSION_ID"*.en.srt | while read srt_file; do # zh subtitle # ls $WWDC_LOCAL_DIR/"wwdc$WWDC_YEAR"-"$SESSION_ID"*.zh.srt | while read srt_file; do echo "$subtitle_content" >> $srt_file; done done } main () { [ ! -d $WWDC_LOCAL_DIR ] && { mkdir $WWDC_LOCAL_DIR; } #Year 2020/2021 change {3\} to {3,5\} curl -s $WWDC_SESSION_PREFIX | grep /videos/play/wwdc$WWDC_YEAR | sed "s#.*/videos/play/wwdc$WWDC_YEAR/\([0-9]\{3,5\}\).*#\1#" | sort | uniq | while read SESSION_ID; do #echo "SESSION_ID is" $SESSION_ID local video_url=$(detect_video_m3u8 $SESSION_ID); local subtitle_url=$(detect_subtitle_m3u8 $video_url); download_subtitle_contents $subtitle_url; done } main;




Run this shell script to format as SRT subtitle

shellscript.sh    Select all
WWDC_YEAR=2021; # cd wwdc$WWDC_YEAR mkdir -p sd mkdir -p hd for i in *_sd.??.srt; do sed -e '/WEBVTT/d;/X-TIMESTAMP/d;s/align.middle line.*$//;' $i | awk '/^[0-9]{2}:[0-9]{2}:/ {seen[$0]++; skipduplicated=0} {if (seen[$0]>1) skipduplicated=1; if (!skipduplicated) print $0}' | awk -v RS="" '{gsub("\n", "-Z"); print}' | awk '$0 !~/^WEB/ {print $0}' | uniq | awk '{printf "\n%s-Z%s", NR,$0 }' | awk -v ORS="\n\n" '{gsub("-Z", "\n"); print}' | sed -e 's/.A:middle$//g;s/&gt;/>/g;s/&lt;/</g;1,2d;' > sd/$i; done for i in *_hd.??.srt; do sed -e '/WEBVTT/d;/X-TIMESTAMP/d;s/align.middle line.*$//;' $i | awk '/^[0-9]{2}:[0-9]{2}:/ {seen[$0]++; skipduplicated=0} {if (seen[$0]>1) skipduplicated=1; if (!skipduplicated) print $0}' | awk -v RS="" '{gsub("\n", "-Z"); print}' | awk '$0 !~/^WEB/ {print $0}' | uniq | awk '{printf "\n%s-Z%s", NR,$0 }' | awk -v ORS="\n\n" '{gsub("-Z", "\n"); print}' | sed -e 's/.A:middle$//g;s/&gt;/>/g;s/&lt;/</g;1,2d;' > hd/$i; done




Run this script wwdc2021_fetch_mp4.sh to download all mp4 (HD and SD) videos (also works for 2021)

wwdc2021_fetch_mp4.sh    Select all
#!/bin/bash # @Last Modified by: javacom # @Last Modified time: 2021-06-10 WWDC_YEAR=2021; # WWDC_SESSION_PREFIX=https://developer.apple.com/videos/play/wwdc$WWDC_YEAR; WWDC_LOCAL_DIR=$(basename $WWDC_SESSION_PREFIX); download_mp4_video () { local session_url=$WWDC_SESSION_PREFIX/$SESSION_ID/; local session_html=$(curl -s $session_url); local video_url=$(echo "$session_html" | grep .m3u8 | grep $SESSION_ID | head -n1 | sed "s#.*\"\(https://.*m3u8\)\".*#\1#"); echo "$session_html" | grep .mp4 | grep $SESSION_ID | sed "s#.*\"\(https://.*mp4\).*\".*#\1#" | while read mp4_url; do local mp4_filename=$(basename $mp4_url); if [ -e $WWDC_LOCAL_DIR/$mp4_filename ] then echo "> MP4 already existed : $WWDC_LOCAL_DIR/$mp4_filename" >&2; echo "> To resume broken download use curl -C - --connect-timeout 1200 -o $WWDC_LOCAL_DIR/$mp4_filename $mp4_url" >&2; echo " " >&2; else echo "> MP4 Downloading... : $mp4_url" >&2; curl --connect-timeout 120 -o $WWDC_LOCAL_DIR/$mp4_filename $mp4_url fi done } main () { [ ! -d $WWDC_LOCAL_DIR ] && { mkdir $WWDC_LOCAL_DIR; } #Year 2020/2021 change {3\} to {3,5\} curl -s $WWDC_SESSION_PREFIX | grep /videos/play/wwdc$WWDC_YEAR | sed "s#.*/videos/play/wwdc$WWDC_YEAR/\([0-9]\{3,5\}\).*#\1#" | sort | uniq | while read SESSION_ID; do download_mp4_video $SESSION_ID; done } main;






Run this script to rename the video or subtitles to proper title (HD & SD) videos

wwdc2021_rename_title.sh    Select all
#!/bin/sh # @Last Modified by: javacom # @Last Modified time: 2021-6-10 # For WWDC2021 video files, VIDEO=sd or VIDEO=hd WWDC_YEAR=2021; VIDEO=sd; curl -s https://developer.apple.com/videos/wwdc${WWDC_YEAR}/ | grep -B1 "video-title" | sed -e "s#[[:space:]]\{10,\}<a href=./videos/play/wwdc202./\([0-9]\{3,5\}\).*#\[ -f \"wwdc${WWDC_YEAR}-\1_${VIDEO}.mp4\" \] \&\& mv \"wwdc${WWDC_YEAR}-\1_${VIDEO}.mp4\" \"wwdc${WWDC_YEAR}-\1_${VIDEO}_#" -e "s/[\@:’\'\,?]//g" -e "s/\"\(The.*\)\"/\1/" -e 's#.*video-title..\(.*\)\(</h4>\)#\1.mp4"#' -e '/--/d' | sed '/^\[ -f/{N;s/\n//;}' | sed "s#[\/@:’\'\,]#_#g" | /bin/bash # For WWDC2021 English subtitle files, VIDEO=sd or VIDEO=hd WWDC_YEAR=2021; VIDEO=sd; curl -s https://developer.apple.com/videos/wwdc${WWDC_YEAR}/ | grep -B1 "video-title" | sed -e "s#[[:space:]]\{10,\}<a href=./videos/play/wwdc202./\([0-9]\{3,5\}\).*#\[ -f \"wwdc${WWDC_YEAR}-\1_${VIDEO}.en.srt\" \] \&\& mv \"wwdc${WWDC_YEAR}-\1_${VIDEO}.en.srt\" \"wwdc${WWDC_YEAR}-\1_${VIDEO}_#" -e "s/[\@:’\'\,?]//g" -e "s/\"\(The.*\)\"/\1/" -e 's#.*video-title..\(.*\)\(</h4>\)#\1.en.srt"#' -e '/--/d' | sed '/^\[ -f/{N;s/\n//;}' | sed "s#[\/@:’\'\,]#_#g" | /bin/bash






# Example nohup commnad for Linux Download

nohup bash -c 'cd $HOME/Downloads/WWDC/; ./wwdc2021_fetch_mp4.sh' &> nohup.wwdc2021.mp4.out &

nohup bash -c 'cd $HOME/Downloads/WWDC/; ./wwdc2021_fetch_en.srt.sh' &> nohup.wwdc2021.en.srt.out &

# or use screen utility to download
screen -S wwdcdownloadmp4
bash wwdc2021_fetch_mp4.sh



Thursday, April 8, 2021

How to write subrountine call in macOS assembly code for x86_64 and arm64

(1) Demo the subrountine call in macOS assembly code for x86_64 and arm64. For M1 Mac with Rosetta 2 installed, it can compile and run x86_64 and arm64 binary after installation of Xcode
callfactorial.c  Select all
/* * An application that illustrates calling the factorial function defined elsewhere. */ #include <stdio.h> #include <inttypes.h> uint64_t factorial(unsigned n); int main() { for (unsigned i = 0; i < 20; i++) { printf("factorial(%2u) = %llu\n", i, factorial(i)); } }


factorial.s  Select all
# ---------------------------------------------------------------------- # A 64-bit recursive implementation of the function # # uint64_t factorial(unsigned n) # # implemented recursively with x86_64 and arm64 assembly code # ----------------------------------------------------------------------- .globl _factorial .text #ifdef __arm64__ .align 4 #endif _factorial: #ifdef __x86_64__ cmp $1, %rdi # n <= 1? jnbe L1 # if not, go do a recursive call mov $1, %rax # otherwise return 1 ret #endif #ifdef __arm64__ cmp x8, #1 //# n > 1? b.gt L1 //# if yes, go do a recursive call mov x0, #1 //# otherwise return 1 ret #endif L1: #ifdef __x86_64__ push %rdi # save n on stack (also aligns %rsp!) dec %rdi # n-1 call _factorial # factorial(n-1), result goes in %rax pop %rdi # restore n imul %rdi, %rax # n * factorial(n-1), stored in %rax ret #endif #ifdef __arm64__ STP X8, LR, [SP, #-16]! //# push x8 and LR(x30) // LR is to return from subroutine subs x8, x8, #1 //# n-1 bl _factorial //# factorial(n-1), result goes in x0 LDP X8, LR, [SP], #16 //# pop x8 and LR(x30) mul x0, x0, x8 //# n * factorial(n-1), stored in x0 ret #endif


(2) To compile with -g and codesign the program so as to debug in lldb under macOS.
shell script  Select all
# To compile and codesign x86_64 version clang factorial.s callfactorial.c -g -o callfactorial_arm64 -arch x86_64 && codesign --entitlement entitlements --force -s - callfactorial_x86_64 # To compile and codesign arm64 version clang factorial.s callfactorial.c -g -o callfactorial_arm64 -arch arm64 && codesign --entitlement entitlements --force -s - callfactorial_arm64


(3) To debug using lldb
shell script  Select all
lldb callfactorial_x86_64 # or lldb callfactorial_arm64 # lldb debug session for arm64 - useful commands (lldb) breakpoint set --name main --name factorial (lldb) breakpoint list (lldb) run (lldb) step (lldb) po i (lldb) reg read x0 x8 lr pc (lldb) reg read -f t cpsr # lldb debug session for x86_64 - useful commands (lldb) reg read -f d rax rdi rflags (lldb) reg read -f t rflags # print the address value in the stackpointer for x86_64 (lldb) p *(int **)$sp # hint: to search lldb command history use ctrl-r


Saturday, April 3, 2021

How to use inline assembly language for M1 Mac for x86_64 and arm64

(1) Demo the inline assembly code for x86_64 and arm64
add-inline.c  Select all
#include <stdio.h> // compile with command line // clang add-inline.c -o add-inline_x86_64 -arch x86_64 // clang add-inline.c -o add-inline_arm64 -arch arm64 // disassemble with // otool -otV add-inline_x86_64 // otool -otV add-inline_arm64 int main(int argc, const char * argv[]) { int a = 10; int b = 25; int ans = 0; #ifdef __x86_64__ __asm__( "add %2,%1;\n" // %1 += %2 add source to destination "mov %1,%0;\n" // move data from %1 to %0 : "=r"(ans) : "r"(a), "r"(b) : ); #endif #ifdef __arm64__ __asm__( "add %w0,%w1,%w2;\n" // load %w0 = %w1 + %w2 : "=r"(ans) : "r"(a), "r"(b) : ); #endif printf("The answer is %d\n",ans); return 0; }


(2) Demo the assembly source code as a separate function for x86_64 and arm64
add-main.c  Select all
#include <stdio.h> // compile with command line // clang add-main.c add.s -o add-main_x86_64 -arch x86_64 // clang add-main.c add.s -o add-main_arm64 -arch arm64 #include <stdio.h> int add(int x, int y); int main(int argc, const char * argv[]) { int ans = add(15,40); printf("The answer is %d\n",ans); return 0; }


add.s  Select all
.text .globl _add .align 2 _add: #ifdef __x86_64__ add %esi,%edi // %edi += %esi, source is the first mov %edi,%eax // move data from %edi to %eax // x86_64 calling convention // rdi, rsi, rdx, rcx, r8, r9 // The 32-bit general purpose registers are edi, esi, edx, ecx, r8d, r9d instead. // The 16-bit general purpose registers are di, si, dx, cx, r8w, r9w instead. // The syscall number is placed in rax // see https://sigsegv.pl/osx-bsd-syscalls/ // Return value is in rax #endif #ifdef __arm64__ add w0,w0,w1 // load w0 with w0+w1, destination is the first // calling convention for arm64 // x0,x1,x2,x3,x4,x5,x6,x7 or r0 to r7 // The 32-bit general purpose registers are w0-w7 instead. // The syscall number is placed in r8 // Return value is in x0 // see https://wiki.cdot.senecacollege.ca/wiki/Syscalls #endif ret




entitlements  Select all
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> <plist version="1.0"> <dict> <key>com.apple.security.get-task-allow</key> <true/> </dict> </plist>


(3) Compile with
clang add-inline.c -o add-inline_x86_64 -arch x86_64
clang add-inline.c -o add-inline_arm64 -arch arm64
clang add-main.c add.s -o add-main_x86_64 -arch x86_64
clang add-main.c add.s -o add-main_arm64 -arch arm64


(4) Disassemble with (e.g.)
otool -otV add-inline_arm64

(5) Install Rosetta 2 on M1 to run the x86_64 version (e.g.)
./add-main_x86_64

(6) codesign and debug with lldb (e.g.)
clang -g -o add-inline_x86_64 add-inline.c -arch x86_64
codesign --entitlement entitlements --force -s - add-inline_x86_64
lldb add-inline_x86_64
(lldb) breakpoint set --file add-inline.c --line 7


(7) floating point example for x86_64 and arm64
compile with
clang sum.s callsum.c -o callsum_x86_64 -arch x86_64
clang sum.s callsum.c -o callsum_arm64 -arch arm64
Debug and codesign similar to above example
callsum.c  Select all
/* * callsum.c * * Illustrates how to call the sum.s function wrote in assembly language. */ // clang sum.s callsum.c -o callsum_x86_64 -arch x86_64 // clang sum.s callsum.c -o callsum_arm64 -arch arm64 #include <stdio.h> double sum(double[], unsigned); int main() { double test[] = { 40.5, 26.7, 21.9, 1.5, -40.5, -23.4 }; printf("%20.7f\n", sum(test, 6)); printf("%20.7f\n", sum(test, 2)); printf("%20.7f\n", sum(test, 0)); printf("%20.7f\n", sum(test, 3)); return 0; }


sum.s  Select all
# ----------------------------------------------------------------------- # A 64-bit function that returns the sum of the elements in a # floating-point array for x86_64 and arm64. The function has prototype: # # double sum(double[] array, unsigned length) # ----------------------------------------------------------------------- .global _sum .text .align 2 _sum: #ifdef __x86_64__ xorpd %xmm0, %xmm0 // initialize the sum to 0 // floats are passed in xmm0 cmp $0, %rsi // special case for length = 0 je done #endif #ifdef __arm64__ movi d0, #0 // initialize the sum to 0 // floats are passed in s0-7 and doubles in the d0-7 registers. cmp x1, #0 // special case for length = 0 b.eq done #endif next: #ifdef __x86_64__ addsd (%rdi), %xmm0 // add in the current array element, return floating point value in xmm0 add $8, %rdi // move to next array element dec %rsi // count down jnz next // if not done counting, continue #endif #ifdef __arm64__ ldr d16, [x0] // load the float into d16 // floats in s0-7 and doubles in the d0-7 registers. fadd d0, d0, d16 // add in the current array element, return floating point value in d0 add x0, x0, #8 // move to next array element subs x1, x1, #1 // count down cbnz w1, next // if not done counting, continue #endif done: ret