diff --git a/.jekyll-cache/Jekyll/Cache/Jekyll--Cache/b7/9606fb3afea5bd1609ed40b622142f1c98125abcfe89a76a661b0e8e343910 b/.jekyll-cache/Jekyll/Cache/Jekyll--Cache/b7/9606fb3afea5bd1609ed40b622142f1c98125abcfe89a76a661b0e8e343910
index af73222..32c22f1 100644
--- a/.jekyll-cache/Jekyll/Cache/Jekyll--Cache/b7/9606fb3afea5bd1609ed40b622142f1c98125abcfe89a76a661b0e8e343910
+++ b/.jekyll-cache/Jekyll/Cache/Jekyll--Cache/b7/9606fb3afea5bd1609ed40b622142f1c98125abcfe89a76a661b0e8e343910
@@ -1 +1 @@
-I"Ą{"source"=>"/Users/pengzhan/Github/codersherlock.github.io", "destination"=>"/Users/pengzhan/Github/codersherlock.github.io/_site", "collections_dir"=>"", "cache_dir"=>".jekyll-cache", "plugins_dir"=>"_plugins", "layouts_dir"=>"_layouts", "data_dir"=>"_data", "includes_dir"=>"_includes", "collections"=>{"posts"=>{"output"=>true, "permalink"=>"/posts/:title"}}, "safe"=>false, "include"=>[".htaccess"], "exclude"=>["CHANGELOG.md", "HOW_TO_RELEASE.md", "Gemfile", "Gemfile.lock", "LICENSE", "README-*.md", "README.md", "gulpfile.js", "jekyll-text-theme.gemspec", "package-lock.json", "package.json", "/docs", "/node_modules", "/screenshots", "/test", "/vendor", "configure.sh", ".sass-cache", ".jekyll-cache", "gemfiles", "node_modules", "vendor/bundle/", "vendor/cache/", "vendor/gems/", "vendor/ruby/"], "keep_files"=>[".git", ".svn"], "encoding"=>"utf-8", "markdown_ext"=>"markdown,mkdown,mkdn,mkd,md", "strict_front_matter"=>false, "show_drafts"=>nil, "limit_posts"=>0, "future"=>false, "unpublished"=>false, "whitelist"=>[], "plugins"=>["jekyll-feed", "jekyll-paginate", "jekyll-sitemap", "jemoji"], "markdown"=>"kramdown", "highlighter"=>"rouge", "lsi"=>false, "excerpt_separator"=>"", "incremental"=>false, "detach"=>false, "port"=>"4000", "host"=>"127.0.0.1", "baseurl"=>"", "show_dir_listing"=>false, "permalink"=>"/posts/:title", "paginate_path"=>"/page:num", "timezone"=>"America/New_York", "quiet"=>false, "verbose"=>false, "defaults"=>[{"scope"=>{"path"=>"", "type"=>"posts"}, "values"=>{"layout"=>"article", "sharing"=>true, "license"=>true, "aside"=>{"toc"=>true}, "show_edit_on_github"=>false, "show_subscribe"=>true, "pageview"=>true}}], "liquid"=>{"error_mode"=>"warn", "strict_filters"=>false, "strict_variables"=>false}, "kramdown"=>{"auto_ids"=>true, "toc_levels"=>[1, 2, 3, 4, 5, 6], "entity_output"=>"as_char", "smart_quotes"=>"lsquo,rsquo,ldquo,rdquo", "input"=>"GFM", "hard_wrap"=>false, "guess_lang"=>true, "footnote_nr"=>1, "show_warnings"=>false, "syntax_highlighter"=>"rouge", "syntax_highlighter_opts"=>{:default_lang=>"plaintext", :guess_lang=>true}, "coderay"=>{}}, "text_skin"=>"default", "highlight_theme"=>"default", "url"=>"http://localhost:4000", "title"=>"Stop Talking, Start Doing", "description"=>"My personal blog, with some boring research staff and some tricks I was fancy to. I'll try my best to make this blog fun and useful. Not just a place I complain about all happens in my Lab.\n", "lang"=>"en", "author"=>{"type"=>nil, "name"=>"Pengzhan Hao", "url"=>nil, "avatar"=>"/static/avatar.jpg", "bio"=>nil, "email"=>"haopengzhan@gmail.com", "facebook"=>nil, "twitter"=>nil, "weibo"=>nil, "googleplus"=>nil, "telegram"=>nil, "medium"=>nil, "zhihu"=>nil, "douban"=>nil, "linkedin"=>"pengzhanhao", "github"=>"codersherlock", "npm"=>nil}, "repository"=>"CoderSherlock/CoderSherlock.github.io", "repository_tree"=>"master", "paths"=>{"root"=>nil, "home"=>nil, "archive"=>nil, "rss"=>nil}, "license"=>nil, "toc"=>{"selectors"=>nil}, "mathjax"=>nil, "mathjax_autoNumber"=>nil, "mermaid"=>nil, "chart"=>nil, "paginate"=>8, "sources"=>nil, "sharing"=>{"provider"=>false, "addthis"=>{"id"=>nil}}, "comments"=>{"provider"=>"disqus", "disqus"=>{"shortname"=>"codersherlockblog"}, "gitalk"=>{"clientID"=>nil, "clientSecret"=>nil, "repository"=>nil, "owner"=>nil, "admin"=>nil}, "valine"=>{"app_id"=>nil, "app_key"=>nil, "placeholder"=>nil, "visitor"=>nil, "meta"=>nil}}, "pageview"=>{"provider"=>false, "leancloud"=>{"app_id"=>nil, "app_key"=>nil, "app_class"=>nil}}, "search"=>{"provider"=>"default", "google"=>{"custom_search_engine_id"=>nil}}, "analytics"=>{"provider"=>"google", "google"=>{"tracking_id"=>"UA-82637164-1", "anonymize_ip"=>false}}, "livereload_port"=>35729, "serving"=>true, "watch"=>true}:ET
\ No newline at end of file
+I"
{"source"=>"/Users/pengzhan/Github/codersherlock.github.io", "destination"=>"/Users/pengzhan/Github/codersherlock.github.io/_site", "collections_dir"=>"", "cache_dir"=>".jekyll-cache", "plugins_dir"=>"_plugins", "layouts_dir"=>"_layouts", "data_dir"=>"_data", "includes_dir"=>"_includes", "collections"=>{"posts"=>{"output"=>true, "permalink"=>"/posts/:title"}}, "safe"=>false, "include"=>[".htaccess"], "exclude"=>["CHANGELOG.md", "HOW_TO_RELEASE.md", "Gemfile", "Gemfile.lock", "LICENSE", "README-*.md", "README.md", "gulpfile.js", "jekyll-text-theme.gemspec", "package-lock.json", "package.json", "/docs", "/node_modules", "/screenshots", "/test", "/vendor", "configure.sh", ".sass-cache", ".jekyll-cache", "gemfiles", "node_modules", "vendor/bundle/", "vendor/cache/", "vendor/gems/", "vendor/ruby/"], "keep_files"=>[".git", ".svn"], "encoding"=>"utf-8", "markdown_ext"=>"markdown,mkdown,mkdn,mkd,md", "strict_front_matter"=>false, "show_drafts"=>nil, "limit_posts"=>0, "future"=>false, "unpublished"=>false, "whitelist"=>[], "plugins"=>["jekyll-feed", "jekyll-paginate", "jekyll-sitemap", "jemoji"], "markdown"=>"kramdown", "highlighter"=>"rouge", "lsi"=>false, "excerpt_separator"=>"", "incremental"=>false, "detach"=>false, "port"=>"4000", "host"=>"127.0.0.1", "baseurl"=>"", "show_dir_listing"=>false, "permalink"=>"/posts/:title", "paginate_path"=>"/page:num", "timezone"=>"America/New_York", "quiet"=>false, "verbose"=>false, "defaults"=>[{"scope"=>{"path"=>"", "type"=>"posts"}, "values"=>{"layout"=>"article", "sharing"=>true, "license"=>true, "aside"=>{"toc"=>true}, "show_edit_on_github"=>false, "show_subscribe"=>true, "pageview"=>true}}], "liquid"=>{"error_mode"=>"warn", "strict_filters"=>false, "strict_variables"=>false}, "kramdown"=>{"auto_ids"=>true, "toc_levels"=>[1, 2, 3, 4, 5, 6], "entity_output"=>"as_char", "smart_quotes"=>"lsquo,rsquo,ldquo,rdquo", "input"=>"GFM", "hard_wrap"=>false, "guess_lang"=>true, "footnote_nr"=>1, "show_warnings"=>false}, "text_skin"=>"default", "highlight_theme"=>"default", "url"=>"https://codersherlock.github.io", "title"=>"Stop Talking, Start Doing", "description"=>"My personal blog, with some boring research staff and some tricks I was fancy to. I'll try my best to make this blog fun and useful. Not just a place I complain about all happens in my Lab.\n", "lang"=>"en", "author"=>{"type"=>nil, "name"=>"Pengzhan Hao", "url"=>nil, "avatar"=>"/static/avatar.jpg", "bio"=>nil, "email"=>"haopengzhan@gmail.com", "facebook"=>nil, "twitter"=>nil, "weibo"=>nil, "googleplus"=>nil, "telegram"=>nil, "medium"=>nil, "zhihu"=>nil, "douban"=>nil, "linkedin"=>"pengzhanhao", "github"=>"codersherlock", "npm"=>nil}, "repository"=>"CoderSherlock/CoderSherlock.github.io", "repository_tree"=>"master", "paths"=>{"root"=>nil, "home"=>nil, "archive"=>nil, "rss"=>nil}, "license"=>nil, "toc"=>{"selectors"=>nil}, "mathjax"=>nil, "mathjax_autoNumber"=>nil, "mermaid"=>nil, "chart"=>nil, "paginate"=>8, "sources"=>nil, "sharing"=>{"provider"=>false, "addthis"=>{"id"=>nil}}, "comments"=>{"provider"=>"disqus", "disqus"=>{"shortname"=>"codersherlockblog"}, "gitalk"=>{"clientID"=>nil, "clientSecret"=>nil, "repository"=>nil, "owner"=>nil, "admin"=>nil}, "valine"=>{"app_id"=>nil, "app_key"=>nil, "placeholder"=>nil, "visitor"=>nil, "meta"=>nil}}, "pageview"=>{"provider"=>false, "leancloud"=>{"app_id"=>nil, "app_key"=>nil, "app_class"=>nil}}, "search"=>{"provider"=>"default", "google"=>{"custom_search_engine_id"=>nil}}, "analytics"=>{"provider"=>"google", "google"=>{"tracking_id"=>"UA-82637164-1", "anonymize_ip"=>false}}, "serving"=>false}:ET
\ No newline at end of file
diff --git a/_posts/2022-02-22-cs350-labs.md b/_posts/2022-02-22-cs350-labs.md
index 534a6e4..9339507 100644
--- a/_posts/2022-02-22-cs350-labs.md
+++ b/_posts/2022-02-22-cs350-labs.md
@@ -363,3 +363,8 @@ It’s time to switch from the running scheduler to the selected process. Wait f
2. How did the last running process stop running and give the CPU back to the scheduler?
+
+
+### Lab
+
+
diff --git a/_site/404.html b/_site/404.html
index dbc1a04..52b61f9 100644
--- a/_site/404.html
+++ b/_site/404.html
@@ -5,7 +5,7 @@
-
+
diff --git a/_site/about.html b/_site/about.html
index 5a09bd2..2578594 100644
--- a/_site/about.html
+++ b/_site/about.html
@@ -5,7 +5,7 @@
-
+
diff --git a/_site/archive.html b/_site/archive.html
index 0aacefd..6f7cdb7 100644
--- a/_site/archive.html
+++ b/_site/archive.html
@@ -5,7 +5,7 @@
-
+
diff --git a/_site/feed.xml b/_site/feed.xml
index 5130ce3..283d9cf 100644
--- a/_site/feed.xml
+++ b/_site/feed.xml
@@ -1,5 +1,5 @@
-Jekyll2022-04-02T21:59:47-04:00http://localhost:4000/feed.xmlStop Talking, Start DoingMy personal blog, with some boring research staff and some tricks I was fancy to. I'll try my best to make this blog fun and useful. Not just a place I complain about all happens in my Lab.
-Pengzhan Haohaopengzhan@gmail.comLabs of CS3502022-02-22T16:08:17-05:002022-02-22T16:08:17-05:00http://localhost:4000/posts/cs350-labs<p>This will be a series regarding lab I gave during the spring 2022 semester.</p>
+Jekyll2022-05-04T19:45:41-04:00https://codersherlock.github.io/feed.xmlStop Talking, Start DoingMy personal blog, with some boring research staff and some tricks I was fancy to. I'll try my best to make this blog fun and useful. Not just a place I complain about all happens in my Lab.
+Pengzhan Haohaopengzhan@gmail.comLabs of CS3502022-02-22T16:08:17-05:002022-02-22T16:08:17-05:00https://codersherlock.github.io/posts/cs350-labs<p>This will be a series regarding lab I gave during the spring 2022 semester.</p>
<p>The reason why I am writing this down is because it has been a week and no students ask for the solution of the last Lab.
I realise that learning gap between students are huge, especially when a non-profit university is admitting more and more students.
@@ -344,6 +344,8 @@ It’s time to switch from the running scheduler to the selected process. Wait f
<li>How did the last running process stop running and give the CPU back to the scheduler?</li>
</ol>
+<h3 id="lab">Lab</h3>
+
<div class="footnotes" role="doc-endnotes">
<ol>
<li id="fn:ldman" role="doc-endnote">
@@ -353,7 +355,7 @@ It’s time to switch from the running scheduler to the selected process. Wait f
<p><a href="https://sourceware.org/binutils/docs/binutils/objcopy.html">3 objcopy - binutils mannual</a>Â <a href="#fnref:objcopyman" class="reversefootnote" role="doc-backlink">↩</a></p>
</li>
</ol>
-</div>Pengzhan HaoThis will be a series regarding lab I gave during the spring 2022 semester. The reason why I am writing this down is because it has been a week and no students ask for the solution of the last Lab. I realise that learning gap between students are huge, especially when a non-profit university is admitting more and more students. To help all students in understanding concepts of modern OS, I decided to write this post. It starts with the past lab content I have (as the skelton), and will be amended with extra materials I think it helps. Remember, it’s for helping in learning. DON’T COPY & PASTE CODE! Index Lab1: Introduction of Makefile and Xv6. Lab3: System calls for process management. Lab4: Inter-processes communication. Lab6/7: CPU scheduling. Lab1-Introduction Lab3-Process Lab4-IPC Lab6-7-Scheduling First user process in xv6 Kernel works In xv6, as the same as conventional linux OS, the very first user-level process is init. Before init’s running, all the OS bootstraps happen in a highly privileged mode(kernel level). Xv6’s kernel has the entry point as the main function located in the file main.c. The main function invokes 17 functions to set up kernel page tables, interrupt handlers, I/O devices and etc. When all kernel preparations are done, by calling the function userinit(), kernel will boot up process init. int main(void) { kinit1(end, P2V(4*1024*1024)); // phys page allocator kvmalloc(); // kernel page table mpinit(); // collect info about this machine lapicinit(); seginit(); // set up segments cprintf("\ncpu%d: starting xv6\n\n", cpu->id); picinit(); // interrupt controller ioapicinit(); // another interrupt controller consoleinit(); // I/O devices & their interrupts uartinit(); // serial port pinit(); // process table tvinit(); // trap vectors binit(); // buffer cache fileinit(); // file table ideinit(); // disk if(!ismp) timerinit(); // uniprocessor timer startothers(); // start other processors kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() userinit(); // first user process // Finish setting up this processor in mpmain. mpmain(); } It’s tricky since that init is a user process, but kernel can’t call any user-level system calls to create it. Why? 1. Kernel has all privileges to create a user process. So it doesn’t need to call system calls such as fork(). And 2. All other user processes can be created by forking from its parent. Forking including clone the whole user virtual memory layout. However, the first process has no parent to fork from. That’s why it makes the creation of the first user process becomes so unique. In proc.c, userinit() define there gives us the whole procedure of creating init. Similar to the fork(), but more simple. Process control block(structures for storing the process status) was created at the very first by calling allocproc(). After then, by invoking setupkvm()(defined in vm.c), kernel memory map was setup for the process. During setting up kernel memory map, a page size virtual memory will be assigned to the process as ready. And later, this page size memory will be used to store instructions of init. Followed by setup kernel stack for the init process, calling inituvm() will load init’s text into the page that is just being allocated. inituvm() takes 3 arguments: a pointer to the process’s page directory (p->pgdir), a char-type pointer declared from external which point to init’s text segment(_binary_initcode_start), and a char-type pointer which points to an external integer as the size of the init’s text segment(_binary_initcode_size). Simply put, it will load instructions of init into the memory. So now, the problem becomes when and where did instructions for init have compiled into the kernel? void userinit(void) { struct proc *p; extern char _binary_initcode_start[], _binary_initcode_size[]; p = allocproc(); initproc = p; if((p->pgdir = setupkvm()) == 0) panic("userinit: out of memory?"); inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); p->sz = PGSIZE; memset(p->tf, 0, sizeof(*p->tf)); p->tf->cs = (SEG_UCODE << 3) | DPL_USER; p->tf->ds = (SEG_UDATA << 3) | DPL_USER; p->tf->es = p->tf->ds; p->tf->ss = p->tf->ds; p->tf->eflags = FL_IF; p->tf->esp = PGSIZE; p->tf->eip = 0; // beginning of initcode.S safestrcpy(p->name, "initcode", sizeof(p->name)); p->cwd = namei("/"); p->state = RUNNABLE; } Where the user-level code was integrated? If you search the keyword “_binary_initcode_start” in the source code, you can’t find any references. The clue comes from the Makefile. In the makefile, initcode is a prerequisites to compile the kernel image. Step 1: Before kernel was compiled, initcode.S was first compiled to a runnable binary initcode. This binary was very odd because it was not supposed to let any other OS to run it. Initcode.s was first compiled without any standard including, and generating the intermediate file initcode.o. Step 2: Initcode.o then linked to Initcode.out with two uncommon settings. First it specify the entry of this binary file as when “start” symbol points to. This “start” symbol was declared in the assembly code. Second it specify a absolute address(0) for the text segments. By doing this, text segments will be placed at the start of the binary file (except the header of the ELF)1. Step 3: Initcode.out is already a minimized binary but it’s not enough. That’s why when using objcopy to copy it to the file initcode, it further strip all headers and debug information2. At this point, we have a minimal binary file initcode. From the first byte of this file, it’s only includes runnable instructions. And the size of the file is only 44 bytes. initcode: initcode.S $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S # Step 1 $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o # Step 2 $(OBJCOPY) -S -O binary initcode.out initcode # Step 3 $(OBJDUMP) -S initcode.o > initcode.asm This binary later were appended to the kernel using following commands. And during this appending, 3 symbols were generated and added to the symbol table of the kernel1. “_binary_initcode_start” contains the address of where the initcode segment was appended to. “_binary_initcode_end” contains the address of where the initcode segment was ended at. “_binary_initcode_size” is a *ABS* type symbol with value 0x2C(45) that specify the size of the initcode segment is 45 bytes. kernel: $(OBJS) entry.o entryother initcode kernel.ld $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother # <- This Line $(OBJDUMP) -S kernel > kernel.asm $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym In short summary, using objdump, we can verify that source code initcode.S has been compiled and loaded into the kernel. Also the segment of initcode’s instructions was located by the pointer “_binary_initcode_start”. That’s explain when calling inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size);, functionalities implemented in initcode.S will be loaded into the runtime of the first process within xv6. # Header of the file kernel kernel: file format elf32-i386 kernel architecture: i386, flags 0x00000112: EXEC_P, HAS_SYMS, D_PAGED start address 0x0010000c Program Header: LOAD off 0x00001000 vaddr 0x80100000 paddr 0x00100000 align 2**12 filesz 0x00008c6a memsz 0x00008c6a flags r-x ... Sections: Idx Name Size VMA LMA File off Algn 0 .text 00008586 80100000 00100000 00001000 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE ... SYMBOL TABLE: ... 8010b50c g .data 00000000 _binary_initcode_end ... 8010b4e0 g .data 00000000 _binary_initcode_start ... 0000002c g *ABS* 00000000 _binary_initcode_size ... User-level code Take a look of content in the initcode.S, you will find the code can explain itself well. There are no other jobs but just calling system call exec to run a user-level binary “init”. Initcode.S: # Initial process execs /init. #include "syscall.h" #include "traps.h" # exec(init, argv) .globl start start: pushl $argv pushl $init pushl $0 // where caller pc would be movl $SYS_exec, %eax int $T_SYSCALL # for(;;) exit(); exit: movl $SYS_exit, %eax int $T_SYSCALL jmp exit # char init[] = "/init\0"; init: .string "/init\0" # char *argv[] = { init, 0 }; .p2align 2 argv: .long init .long 0 The “init” mentioned above is not a pure user-level binary executable that compiled from the source code init.c. Within init.c, a file named console will be created at the runtime for saving standard outputs and errors. Then it will forked a child process(the second user process), and let it run program “sh”. “sh” is the xv6’s default shell, a user-level program that generated from source sh.c. After the shell boots up, you can interactive with the xv6. This’s how first process (and second process) was started in the xv6. init.c: // init: The initial user-level program #include "types.h" #include "stat.h" #include "user.h" #include "fcntl.h" char *argv[] = { "sh", 0 }; int main(void) { int pid, wpid; if(open("console", O_RDWR) < 0){ mknod("console", 1, 1); open("console", O_RDWR); } dup(0); // stdout dup(0); // stderr for(;;){ printf(1, "init: starting sh\n"); pid = fork(); if(pid < 0){ printf(1, "init: fork failed\n"); exit(); } if(pid == 0){ exec("sh", argv); printf(1, "init: exec sh failed\n"); exit(); } while((wpid=wait()) >= 0 && wpid != pid) printf(1, "zombie!\n"); } } Xv6’s round robin schduler The Scheduler is the core of an operating system. With the scheduling of processes, the kernel can achieve near-real-time execution of multiple workloads. The scheduling problem is also an active aspect of computer science research. You can’t have one algorithm to fit all scenarios. Xv6 by default has a round-robin scheduler. It’s controlled using two-level for-loops, where the top-level for-loop is an endless loop that will keep the scheduler busy running. The second-level nested for-loop will iterate a data structure named Ptable where all control information for processes is stored. Information including pid, process name, etc. is stored in a structure called proc. Ptable is an array of processes. Every runnable process in the Ptable will run strictly 1 time tick until the for-loop reached the last process in the Ptable. Then it will loop back to the top-level for-loop for the next iteration of processes. // In file proc.c struct { struct spinlock lock; struct proc proc[NPROC]; } ptable; // In file proc.h struct proc { uint sz; // Size of process memory (bytes) pde_t* pgdir; // Page table char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state int pid; // Process ID struct proc *parent; // Parent process struct trapframe *tf; // Trap frame for current syscall struct context *context; // swtch() here to run process void *chan; // If non-zero, sleeping on chan int killed; // If non-zero, have been killed struct file *ofile[NOFILE]; // Open files struct inode *cwd; // Current directory char name[16]; // Process name (debugging) }; // In file proc.c void scheduler(void) { struct proc *p; for(;;){ // Enable interrupts on this processor. sti(); // Loop over process table looking for process to run. acquire(&ptable.lock); for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ if(p->state != RUNNABLE) continue; // Switch to chosen process. It is the process's job // to release ptable.lock and then reacquire it // before jumping back to us. proc = p; switchuvm(p); p->state = RUNNING; swtch(&cpu->scheduler, proc->context); switchkvm(); // Process is done running for now. // It should have changed its p->state before coming back. proc = 0; } release(&ptable.lock); } } It’s not hard to understand why this logic makes a round-robin manner. This is very important to understand how to pick a process to run because scheduling is about always picking the appropriate process to achieve higher performance. You can always come up with some new ideas for designing a good scheduler policy. Understanding how to switch from one process to another is equivalently important. Once the process for the next time tick is selected. It’s time to switch from the running scheduler to the selected process. Wait for a second, there are two questions we haven’t answered. What is the running scheduler? How did the last running process stop running and give the CPU back to the scheduler? ld(1) - Linux man page ↩ ↩2 3 objcopy - binutils mannual ↩EDDL: How do we train neural networks on limited edge devices - PART 22021-10-31T13:01:14-04:002021-10-31T13:01:14-04:00http://localhost:4000/posts/eddl-how-do-we-train-on-limited-edge-devices-part2<p>In the last post, part1, our idea of distributed learning on edge environment was generally addressed.
+</div>Pengzhan HaoThis will be a series regarding lab I gave during the spring 2022 semester. The reason why I am writing this down is because it has been a week and no students ask for the solution of the last Lab. I realise that learning gap between students are huge, especially when a non-profit university is admitting more and more students. To help all students in understanding concepts of modern OS, I decided to write this post. It starts with the past lab content I have (as the skelton), and will be amended with extra materials I think it helps. Remember, it’s for helping in learning. DON’T COPY & PASTE CODE! Index Lab1: Introduction of Makefile and Xv6. Lab3: System calls for process management. Lab4: Inter-processes communication. Lab6/7: CPU scheduling. Lab1-Introduction Lab3-Process Lab4-IPC Lab6-7-Scheduling First user process in xv6 Kernel works In xv6, as the same as conventional linux OS, the very first user-level process is init. Before init’s running, all the OS bootstraps happen in a highly privileged mode(kernel level). Xv6’s kernel has the entry point as the main function located in the file main.c. The main function invokes 17 functions to set up kernel page tables, interrupt handlers, I/O devices and etc. When all kernel preparations are done, by calling the function userinit(), kernel will boot up process init. int main(void) { kinit1(end, P2V(4*1024*1024)); // phys page allocator kvmalloc(); // kernel page table mpinit(); // collect info about this machine lapicinit(); seginit(); // set up segments cprintf("\ncpu%d: starting xv6\n\n", cpu->id); picinit(); // interrupt controller ioapicinit(); // another interrupt controller consoleinit(); // I/O devices & their interrupts uartinit(); // serial port pinit(); // process table tvinit(); // trap vectors binit(); // buffer cache fileinit(); // file table ideinit(); // disk if(!ismp) timerinit(); // uniprocessor timer startothers(); // start other processors kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() userinit(); // first user process // Finish setting up this processor in mpmain. mpmain(); } It’s tricky since that init is a user process, but kernel can’t call any user-level system calls to create it. Why? 1. Kernel has all privileges to create a user process. So it doesn’t need to call system calls such as fork(). And 2. All other user processes can be created by forking from its parent. Forking including clone the whole user virtual memory layout. However, the first process has no parent to fork from. That’s why it makes the creation of the first user process becomes so unique. In proc.c, userinit() define there gives us the whole procedure of creating init. Similar to the fork(), but more simple. Process control block(structures for storing the process status) was created at the very first by calling allocproc(). After then, by invoking setupkvm()(defined in vm.c), kernel memory map was setup for the process. During setting up kernel memory map, a page size virtual memory will be assigned to the process as ready. And later, this page size memory will be used to store instructions of init. Followed by setup kernel stack for the init process, calling inituvm() will load init’s text into the page that is just being allocated. inituvm() takes 3 arguments: a pointer to the process’s page directory (p->pgdir), a char-type pointer declared from external which point to init’s text segment(_binary_initcode_start), and a char-type pointer which points to an external integer as the size of the init’s text segment(_binary_initcode_size). Simply put, it will load instructions of init into the memory. So now, the problem becomes when and where did instructions for init have compiled into the kernel? void userinit(void) { struct proc *p; extern char _binary_initcode_start[], _binary_initcode_size[]; p = allocproc(); initproc = p; if((p->pgdir = setupkvm()) == 0) panic("userinit: out of memory?"); inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); p->sz = PGSIZE; memset(p->tf, 0, sizeof(*p->tf)); p->tf->cs = (SEG_UCODE << 3) | DPL_USER; p->tf->ds = (SEG_UDATA << 3) | DPL_USER; p->tf->es = p->tf->ds; p->tf->ss = p->tf->ds; p->tf->eflags = FL_IF; p->tf->esp = PGSIZE; p->tf->eip = 0; // beginning of initcode.S safestrcpy(p->name, "initcode", sizeof(p->name)); p->cwd = namei("/"); p->state = RUNNABLE; } Where the user-level code was integrated? If you search the keyword “_binary_initcode_start” in the source code, you can’t find any references. The clue comes from the Makefile. In the makefile, initcode is a prerequisites to compile the kernel image. Step 1: Before kernel was compiled, initcode.S was first compiled to a runnable binary initcode. This binary was very odd because it was not supposed to let any other OS to run it. Initcode.s was first compiled without any standard including, and generating the intermediate file initcode.o. Step 2: Initcode.o then linked to Initcode.out with two uncommon settings. First it specify the entry of this binary file as when “start” symbol points to. This “start” symbol was declared in the assembly code. Second it specify a absolute address(0) for the text segments. By doing this, text segments will be placed at the start of the binary file (except the header of the ELF)1. Step 3: Initcode.out is already a minimized binary but it’s not enough. That’s why when using objcopy to copy it to the file initcode, it further strip all headers and debug information2. At this point, we have a minimal binary file initcode. From the first byte of this file, it’s only includes runnable instructions. And the size of the file is only 44 bytes. initcode: initcode.S $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S # Step 1 $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o # Step 2 $(OBJCOPY) -S -O binary initcode.out initcode # Step 3 $(OBJDUMP) -S initcode.o > initcode.asm This binary later were appended to the kernel using following commands. And during this appending, 3 symbols were generated and added to the symbol table of the kernel1. “_binary_initcode_start” contains the address of where the initcode segment was appended to. “_binary_initcode_end” contains the address of where the initcode segment was ended at. “_binary_initcode_size” is a *ABS* type symbol with value 0x2C(45) that specify the size of the initcode segment is 45 bytes. kernel: $(OBJS) entry.o entryother initcode kernel.ld $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother # <- This Line $(OBJDUMP) -S kernel > kernel.asm $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym In short summary, using objdump, we can verify that source code initcode.S has been compiled and loaded into the kernel. Also the segment of initcode’s instructions was located by the pointer “_binary_initcode_start”. That’s explain when calling inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size);, functionalities implemented in initcode.S will be loaded into the runtime of the first process within xv6. # Header of the file kernel kernel: file format elf32-i386 kernel architecture: i386, flags 0x00000112: EXEC_P, HAS_SYMS, D_PAGED start address 0x0010000c Program Header: LOAD off 0x00001000 vaddr 0x80100000 paddr 0x00100000 align 2**12 filesz 0x00008c6a memsz 0x00008c6a flags r-x ... Sections: Idx Name Size VMA LMA File off Algn 0 .text 00008586 80100000 00100000 00001000 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE ... SYMBOL TABLE: ... 8010b50c g .data 00000000 _binary_initcode_end ... 8010b4e0 g .data 00000000 _binary_initcode_start ... 0000002c g *ABS* 00000000 _binary_initcode_size ... User-level code Take a look of content in the initcode.S, you will find the code can explain itself well. There are no other jobs but just calling system call exec to run a user-level binary “init”. Initcode.S: # Initial process execs /init. #include "syscall.h" #include "traps.h" # exec(init, argv) .globl start start: pushl $argv pushl $init pushl $0 // where caller pc would be movl $SYS_exec, %eax int $T_SYSCALL # for(;;) exit(); exit: movl $SYS_exit, %eax int $T_SYSCALL jmp exit # char init[] = "/init\0"; init: .string "/init\0" # char *argv[] = { init, 0 }; .p2align 2 argv: .long init .long 0 The “init” mentioned above is not a pure user-level binary executable that compiled from the source code init.c. Within init.c, a file named console will be created at the runtime for saving standard outputs and errors. Then it will forked a child process(the second user process), and let it run program “sh”. “sh” is the xv6’s default shell, a user-level program that generated from source sh.c. After the shell boots up, you can interactive with the xv6. This’s how first process (and second process) was started in the xv6. init.c: // init: The initial user-level program #include "types.h" #include "stat.h" #include "user.h" #include "fcntl.h" char *argv[] = { "sh", 0 }; int main(void) { int pid, wpid; if(open("console", O_RDWR) < 0){ mknod("console", 1, 1); open("console", O_RDWR); } dup(0); // stdout dup(0); // stderr for(;;){ printf(1, "init: starting sh\n"); pid = fork(); if(pid < 0){ printf(1, "init: fork failed\n"); exit(); } if(pid == 0){ exec("sh", argv); printf(1, "init: exec sh failed\n"); exit(); } while((wpid=wait()) >= 0 && wpid != pid) printf(1, "zombie!\n"); } } Xv6’s round robin schduler The Scheduler is the core of an operating system. With the scheduling of processes, the kernel can achieve near-real-time execution of multiple workloads. The scheduling problem is also an active aspect of computer science research. You can’t have one algorithm to fit all scenarios. Xv6 by default has a round-robin scheduler. It’s controlled using two-level for-loops, where the top-level for-loop is an endless loop that will keep the scheduler busy running. The second-level nested for-loop will iterate a data structure named Ptable where all control information for processes is stored. Information including pid, process name, etc. is stored in a structure called proc. Ptable is an array of processes. Every runnable process in the Ptable will run strictly 1 time tick until the for-loop reached the last process in the Ptable. Then it will loop back to the top-level for-loop for the next iteration of processes. // In file proc.c struct { struct spinlock lock; struct proc proc[NPROC]; } ptable; // In file proc.h struct proc { uint sz; // Size of process memory (bytes) pde_t* pgdir; // Page table char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state int pid; // Process ID struct proc *parent; // Parent process struct trapframe *tf; // Trap frame for current syscall struct context *context; // swtch() here to run process void *chan; // If non-zero, sleeping on chan int killed; // If non-zero, have been killed struct file *ofile[NOFILE]; // Open files struct inode *cwd; // Current directory char name[16]; // Process name (debugging) }; // In file proc.c void scheduler(void) { struct proc *p; for(;;){ // Enable interrupts on this processor. sti(); // Loop over process table looking for process to run. acquire(&ptable.lock); for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ if(p->state != RUNNABLE) continue; // Switch to chosen process. It is the process's job // to release ptable.lock and then reacquire it // before jumping back to us. proc = p; switchuvm(p); p->state = RUNNING; swtch(&cpu->scheduler, proc->context); switchkvm(); // Process is done running for now. // It should have changed its p->state before coming back. proc = 0; } release(&ptable.lock); } } It’s not hard to understand why this logic makes a round-robin manner. This is very important to understand how to pick a process to run because scheduling is about always picking the appropriate process to achieve higher performance. You can always come up with some new ideas for designing a good scheduler policy. Understanding how to switch from one process to another is equivalently important. Once the process for the next time tick is selected. It’s time to switch from the running scheduler to the selected process. Wait for a second, there are two questions we haven’t answered. What is the running scheduler? How did the last running process stop running and give the CPU back to the scheduler? Lab ld(1) - Linux man page ↩ ↩2 3 objcopy - binutils mannual ↩EDDL: How do we train neural networks on limited edge devices - PART 22021-10-31T13:01:14-04:002021-10-31T13:01:14-04:00https://codersherlock.github.io/posts/eddl-how-do-we-train-on-limited-edge-devices-part2<p>In the last post, part1, our idea of distributed learning on edge environment was generally addressed.
I introduced the reason why edge distributed learning is needed and what improvements it can achieve.
In this post, I will talk about our motivation study and how our framework works.</p>
@@ -457,7 +459,7 @@ The other platform is the <a href="https://www.raspberrypi.com/products/
<p>The operating system running on the above platforms is Ubuntu 18.04 with Linux kernel 4.14.
We used <a href="http://dlib.net/">Dlib</a>, a C++ library that provides implementations for a wide range of machine learning algorithms.
-We chose the Dlib library because it is written in C/C++, and can be easily and natively used in embedded devices.</p>Pengzhan HaoIn the last post, part1, our idea of distributed learning on edge environment was generally addressed. I introduced the reason why edge distributed learning is needed and what improvements it can achieve. In this post, I will talk about our motivation study and how our framework works. How does data support us training on edge? Before designing and implementing our framework, we first need confirmation that training on edge resource-limited devices is worthwhile. We were using a malware detection neural network to show why a small, customized neural network is better. We collected 32000+ mobile apps feature as global data. With these data records, we trained a multilayer perceptron called “PerNet” to determine whether a given feature belongs to a benign or malware app. We called this detection. As well, PerNet can also classify malware apps into different types of attacks. We called this classification. The global model can achieve 93% above recall rate and 96.93% above accuracy. With all these data, we selected two community app usage sub-dataset for local model generations. Large categories (Scenario 1) We chose the 5 largest categories of apps, including entertainment, tools, brain&Puzzle, Lifestyle, and Education, as well as the 5 largest malware categories. All together, 12000+ apps were included in this sub-dataset, almost 50 to 50 between benign and malware. Campus-community categories (Scenario 2) We chose the 5 most downloaded categories from college students as benign groups, as well as a similar amount of 5 malware categories. To ensure that malware apps are included in 5 benign categories, we also considered synthesizing some other malware apps within categories of 5 most downloaded(benign) categories. With these two types of sub-dataset, we used the same PerNet to generate multiple local models. Under each scenarios experiment, we compared global and local models on the preserved test dataset. In all classification performances, local beat global in every scenario. In detection performances, local also share the same accuracy as global does. In summary, local models were trained on special occasions. Under the same circumstance, a global model can achieve no better accuracy than local models. The reason why local is better might be because of overfitting. I believe this issue also be considered in the machine learning communities that they brought transfer learning, a technique to optimize global models to special scenarios but performing more training to a global model once it’s shipped to local. Design and Implementation Overall design The basic EDDL distributed training setup consists of 3 parts. EDDL training cluster, a device cluster that consists of edge or mobile devices that are participating in training. EDDL manager, the initial driver program that works as collect training data, relay data to training devices and initial training clusters. Training data entry (TDE), a data storage for all training data. Dynamic training data distribution Existing distributed DNN training solutions usually statically partition training data among workers. It can be a problem when the training node joins and exits. We designed our framework that can dynamically distribute training data during learning. Before every training batch started, a batch of TDE will be sent to devices. In our experiments, we found that by applying this design, overall training time was shortened by doing. Especially in large amount devices cases, this optimization can be 50% less than statically divided. Scaling up cluster size Our framework was designed to have both sync and async parameter aggregation. Asynchronous aggregation can allow a high outcome of training batch but with a sacrifice or converge time. Synchronous aggregation allows a quick converge time in epochs, however can’t ensure performance when there’s a struggler worker. As showed in experiments, we chose sync as default because the converging time is dominant in overall training time. But, we also considered the possibilities of that async with more workers can achieve similar overall training time. We introduced a formula to determine whether adding more training nodes can help or not. Here we used bandwidth usage coefficient (BUC) as \[BUC = \dfrac{n}{T_{sync}}\] In this formula, \(n\) is the number of devices, and \(T_{sync}\) is the transmission time of parameters. With an increasing number of workers, n increase linearly but transmission time does not. When \(BUC\) increases, the cluster can speed up training time by adding workers. Otherwise, adding more workers won’t help with overall training time. Adaptive leader role splitting The idea of role splitting is simple that a device can work as a worker as well leader. The advantage of doing this is straightforward that we can transfer 1 less parameter and training time will be shortened. However, in our current settings, it can’t perform much better help since only 1 leader role is in a cluster. We can benefit from this in our future works. Overall architecture Details were given in the image. Prototype hardware and software EDDL was designed to be run on two single-board computer embedded platforms. One such platform is ODROID-XU4, which is equipped with a 2.1/1.4 GHz 32-bit ARM processor and 2GB memory. The other platform is the Raspberry Pi 3 Model B board, which comes with an ARM 1.2 GHz 64-bit quad-core processor and 1GB memory. The operating system running on the above platforms is Ubuntu 18.04 with Linux kernel 4.14. We used Dlib, a C++ library that provides implementations for a wide range of machine learning algorithms. We chose the Dlib library because it is written in C/C++, and can be easily and natively used in embedded devices.EDDL: How do we train neural networks on limited edge devices - PART 12021-10-13T16:53:20-04:002021-10-13T16:53:20-04:00http://localhost:4000/posts/eddl-how-do-we-train-on-limited-edge-devices<p>This post introduces our previous milestone in project “Edge trainer”, as the paper “EDDL: A Distributed Deep Learning System for Resource-limited Edge Computing Environment.” was published.
+We chose the Dlib library because it is written in C/C++, and can be easily and natively used in embedded devices.</p>Pengzhan HaoIn the last post, part1, our idea of distributed learning on edge environment was generally addressed. I introduced the reason why edge distributed learning is needed and what improvements it can achieve. In this post, I will talk about our motivation study and how our framework works. How does data support us training on edge? Before designing and implementing our framework, we first need confirmation that training on edge resource-limited devices is worthwhile. We were using a malware detection neural network to show why a small, customized neural network is better. We collected 32000+ mobile apps feature as global data. With these data records, we trained a multilayer perceptron called “PerNet” to determine whether a given feature belongs to a benign or malware app. We called this detection. As well, PerNet can also classify malware apps into different types of attacks. We called this classification. The global model can achieve 93% above recall rate and 96.93% above accuracy. With all these data, we selected two community app usage sub-dataset for local model generations. Large categories (Scenario 1) We chose the 5 largest categories of apps, including entertainment, tools, brain&Puzzle, Lifestyle, and Education, as well as the 5 largest malware categories. All together, 12000+ apps were included in this sub-dataset, almost 50 to 50 between benign and malware. Campus-community categories (Scenario 2) We chose the 5 most downloaded categories from college students as benign groups, as well as a similar amount of 5 malware categories. To ensure that malware apps are included in 5 benign categories, we also considered synthesizing some other malware apps within categories of 5 most downloaded(benign) categories. With these two types of sub-dataset, we used the same PerNet to generate multiple local models. Under each scenarios experiment, we compared global and local models on the preserved test dataset. In all classification performances, local beat global in every scenario. In detection performances, local also share the same accuracy as global does. In summary, local models were trained on special occasions. Under the same circumstance, a global model can achieve no better accuracy than local models. The reason why local is better might be because of overfitting. I believe this issue also be considered in the machine learning communities that they brought transfer learning, a technique to optimize global models to special scenarios but performing more training to a global model once it’s shipped to local. Design and Implementation Overall design The basic EDDL distributed training setup consists of 3 parts. EDDL training cluster, a device cluster that consists of edge or mobile devices that are participating in training. EDDL manager, the initial driver program that works as collect training data, relay data to training devices and initial training clusters. Training data entry (TDE), a data storage for all training data. Dynamic training data distribution Existing distributed DNN training solutions usually statically partition training data among workers. It can be a problem when the training node joins and exits. We designed our framework that can dynamically distribute training data during learning. Before every training batch started, a batch of TDE will be sent to devices. In our experiments, we found that by applying this design, overall training time was shortened by doing. Especially in large amount devices cases, this optimization can be 50% less than statically divided. Scaling up cluster size Our framework was designed to have both sync and async parameter aggregation. Asynchronous aggregation can allow a high outcome of training batch but with a sacrifice or converge time. Synchronous aggregation allows a quick converge time in epochs, however can’t ensure performance when there’s a struggler worker. As showed in experiments, we chose sync as default because the converging time is dominant in overall training time. But, we also considered the possibilities of that async with more workers can achieve similar overall training time. We introduced a formula to determine whether adding more training nodes can help or not. Here we used bandwidth usage coefficient (BUC) as \[BUC = \dfrac{n}{T_{sync}}\] In this formula, \(n\) is the number of devices, and \(T_{sync}\) is the transmission time of parameters. With an increasing number of workers, n increase linearly but transmission time does not. When \(BUC\) increases, the cluster can speed up training time by adding workers. Otherwise, adding more workers won’t help with overall training time. Adaptive leader role splitting The idea of role splitting is simple that a device can work as a worker as well leader. The advantage of doing this is straightforward that we can transfer 1 less parameter and training time will be shortened. However, in our current settings, it can’t perform much better help since only 1 leader role is in a cluster. We can benefit from this in our future works. Overall architecture Details were given in the image. Prototype hardware and software EDDL was designed to be run on two single-board computer embedded platforms. One such platform is ODROID-XU4, which is equipped with a 2.1/1.4 GHz 32-bit ARM processor and 2GB memory. The other platform is the Raspberry Pi 3 Model B board, which comes with an ARM 1.2 GHz 64-bit quad-core processor and 1GB memory. The operating system running on the above platforms is Ubuntu 18.04 with Linux kernel 4.14. We used Dlib, a C++ library that provides implementations for a wide range of machine learning algorithms. We chose the Dlib library because it is written in C/C++, and can be easily and natively used in embedded devices.EDDL: How do we train neural networks on limited edge devices - PART 12021-10-13T16:53:20-04:002021-10-13T16:53:20-04:00https://codersherlock.github.io/posts/eddl-how-do-we-train-on-limited-edge-devices<p>This post introduces our previous milestone in project “Edge trainer”, as the paper “EDDL: A Distributed Deep Learning System for Resource-limited Edge Computing Environment.” was published.
As the first part of the introductions, I focus only on the motivation and summary of our works.
More details in design and implementation can be found in late posts.</p>
@@ -534,7 +536,7 @@ Smartly schedule work balance and handle join/exit issues also need under consid
Devices with sufficient bandwidth can also work as virtual leader devices.
This approach helps minimize physical devices we used and more leaders can further scale up workers’ limits.</p>
</li>
-</ul>Pengzhan HaoThis post introduces our previous milestone in project “Edge trainer”, as the paper “EDDL: A Distributed Deep Learning System for Resource-limited Edge Computing Environment.” was published. As the first part of the introductions, I focus only on the motivation and summary of our works. More details in design and implementation can be found in late posts.Generate Word Cloud Figures with Chinese-Tokenization and WordCloud python libraries2020-09-15T22:00:14-04:002020-09-15T22:00:14-04:00http://localhost:4000/posts/generate-word-cloud-with-chinese-fenci<p>Let’s generate a word cloud like this.
+</ul>Pengzhan HaoThis post introduces our previous milestone in project “Edge trainer”, as the paper “EDDL: A Distributed Deep Learning System for Resource-limited Edge Computing Environment.” was published. As the first part of the introductions, I focus only on the motivation and summary of our works. More details in design and implementation can be found in late posts.Generate Word Cloud Figures with Chinese-Tokenization and WordCloud python libraries2020-09-15T22:00:14-04:002020-09-15T22:00:14-04:00https://codersherlock.github.io/posts/generate-word-cloud-with-chinese-fenci<p>Let’s generate a word cloud like this.
Don’t understand the language is not a big deal.
If your written language is based on latin alphabet(or other language has space between words), skip tokenization.</p>
@@ -693,7 +695,7 @@ If your written language is based on latin alphabet(or other language has space
<p><img src="/static/2020-09/2020-06-28.png" height="150" /></p>
-<p>This generated word cloud figure reflects the most popular economy news’ keyword in the week started 06-28-2020. Two largest words in the figure are â€ść–°ĺ† â€ť and â€ść–°ĺ† ç—…ćŻ’â€ť, both means “Covid-19” (This figure was in the week of the second covid spur in Beijing, China). The size of the image fits my phone screen and I can use an app to automatic sync it to my phone’s wallpaper. However, in this image, too many location nouns are presented. This will be something I can make progress on in the future.</p>Pengzhan HaoLet’s generate a word cloud like this. Don’t understand the language is not a big deal. If your written language is based on latin alphabet(or other language has space between words), skip tokenization.Xv6 introduction2017-07-28T14:56:55-04:002017-07-28T14:56:55-04:00http://localhost:4000/posts/intro-xv6<p>In this post, you will learn a few basic concepts of xv6. Learning path will be closed coupled to first project assignment I gave when I assisted in teaching OS classes.
+<p>This generated word cloud figure reflects the most popular economy news’ keyword in the week started 06-28-2020. Two largest words in the figure are â€ść–°ĺ† â€ť and â€ść–°ĺ† ç—…ćŻ’â€ť, both means “Covid-19” (This figure was in the week of the second covid spur in Beijing, China). The size of the image fits my phone screen and I can use an app to automatic sync it to my phone’s wallpaper. However, in this image, too many location nouns are presented. This will be something I can make progress on in the future.</p>Pengzhan HaoLet’s generate a word cloud like this. Don’t understand the language is not a big deal. If your written language is based on latin alphabet(or other language has space between words), skip tokenization.Xv6 introduction2017-07-28T14:56:55-04:002017-07-28T14:56:55-04:00https://codersherlock.github.io/posts/intro-xv6<p>In this post, you will learn a few basic concepts of xv6. Learning path will be closed coupled to first project assignment I gave when I assisted in teaching OS classes.
Understand system call and know how to implement a simple one will be coved as the first half.
In the second half of this post, I will discuss a little bit more on how to debug xv6 using gdb.<br />
<!--more--></p>
@@ -768,7 +770,7 @@ Using ssh may connect to different physical devices under same domain name, this
<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code>target remote localhost:28467
<span class="c"># target remote [ip-addr]:28467</span>
-</code></pre></div></div>Pengzhan HaoIn this post, you will learn a few basic concepts of xv6. Learning path will be closed coupled to first project assignment I gave when I assisted in teaching OS classes. Understand system call and know how to implement a simple one will be coved as the first half. In the second half of this post, I will discuss a little bit more on how to debug xv6 using gdb.Some of my previews experiment works: 20162016-10-28T12:27:33-04:002016-10-28T12:27:33-04:00http://localhost:4000/posts/some-of-my-previews-exper-work<p>This blog contains only some basic record of my works. For some details, I will write a unique blog just for some specific topics.
+</code></pre></div></div>Pengzhan HaoIn this post, you will learn a few basic concepts of xv6. Learning path will be closed coupled to first project assignment I gave when I assisted in teaching OS classes. Understand system call and know how to implement a simple one will be coved as the first half. In the second half of this post, I will discuss a little bit more on how to debug xv6 using gdb.Some of my previews experiment works: 20162016-10-28T12:27:33-04:002016-10-28T12:27:33-04:00https://codersherlock.github.io/posts/some-of-my-previews-exper-work<p>This blog contains only some basic record of my works. For some details, I will write a unique blog just for some specific topics.
<!--more--></p>
<h1 id="2016-10">2016-10</h1>
@@ -878,7 +880,7 @@ unxz ubuntu-14.04lts-xubuntu-odroid-xu-20140714.img.xz
<span class="c"># dump file</span>
su
<span class="nb">dd </span><span class="k">if</span><span class="o">=</span>/dev/block/mmcblk0p37 <span class="nv">of</span><span class="o">=</span>/sdcard/boot.img
-</code></pre></div></div>Pengzhan HaoThis blog contains only some basic record of my works. For some details, I will write a unique blog just for some specific topics.Using charles proxy to monitor mobile SSL traffics2016-10-27T22:50:33-04:002016-10-27T22:50:33-04:00http://localhost:4000/posts/charles-is-not-a-good-tool<p>In this blog, I will generally talk about how to use proper tools to monitor SSL traffics of a mobile devices. Currently, I only can dealing with those SSL traffics which use an obviously certification. Some applications may not using system root cert or they doesn’t provide us a method to modify their own certs. For these situation, I still didn’t find a good solutions for it. But I’ll keep updating this if I get one.<br />
+</code></pre></div></div>Pengzhan HaoThis blog contains only some basic record of my works. For some details, I will write a unique blog just for some specific topics.Using charles proxy to monitor mobile SSL traffics2016-10-27T22:50:33-04:002016-10-27T22:50:33-04:00https://codersherlock.github.io/posts/charles-is-not-a-good-tool<p>In this blog, I will generally talk about how to use proper tools to monitor SSL traffics of a mobile devices. Currently, I only can dealing with those SSL traffics which use an obviously certification. Some applications may not using system root cert or they doesn’t provide us a method to modify their own certs. For these situation, I still didn’t find a good solutions for it. But I’ll keep updating this if I get one.<br />
My current solution is using AP to forward all SSL traffic to a proxy, <a href="https://www.charlesproxy.com/">charles proxy</a> is my first choice (Prof asked). It’s a non-free software which still update new versions now. So mainly, I’ll talk about how to charles SSL proxy.
<!--more--></p>
@@ -907,4 +909,4 @@ You also need to save charles Root Certificate, it also contains in the same men
<ul>
<li>Set Proxy and SSL Proxy</li>
-</ul>Pengzhan HaoIn this blog, I will generally talk about how to use proper tools to monitor SSL traffics of a mobile devices. Currently, I only can dealing with those SSL traffics which use an obviously certification. Some applications may not using system root cert or they doesn’t provide us a method to modify their own certs. For these situation, I still didn’t find a good solutions for it. But I’ll keep updating this if I get one. My current solution is using AP to forward all SSL traffic to a proxy, charles proxy is my first choice (Prof asked). It’s a non-free software which still update new versions now. So mainly, I’ll talk about how to charles SSL proxy.Stop Talking is the worst title of one blog2016-10-26T22:50:33-04:002016-10-26T22:50:33-04:00http://localhost:4000/posts/welcome-to-my-blogPengzhan Haohaopengzhan@gmail.com
\ No newline at end of file
+</ul>Pengzhan HaoIn this blog, I will generally talk about how to use proper tools to monitor SSL traffics of a mobile devices. Currently, I only can dealing with those SSL traffics which use an obviously certification. Some applications may not using system root cert or they doesn’t provide us a method to modify their own certs. For these situation, I still didn’t find a good solutions for it. But I’ll keep updating this if I get one. My current solution is using AP to forward all SSL traffic to a proxy, charles proxy is my first choice (Prof asked). It’s a non-free software which still update new versions now. So mainly, I’ll talk about how to charles SSL proxy.Stop Talking is the worst title of one blog2016-10-26T22:50:33-04:002016-10-26T22:50:33-04:00https://codersherlock.github.io/posts/welcome-to-my-blogPengzhan Haohaopengzhan@gmail.com
\ No newline at end of file
diff --git a/_site/index.html b/_site/index.html
index f9de27f..a1e18a5 100644
--- a/_site/index.html
+++ b/_site/index.html
@@ -5,7 +5,7 @@
-
+
diff --git a/_site/posts/charles-is-not-a-good-tool.html b/_site/posts/charles-is-not-a-good-tool.html
index 3b34ea2..cac6e64 100644
--- a/_site/posts/charles-is-not-a-good-tool.html
+++ b/_site/posts/charles-is-not-a-good-tool.html
@@ -4,7 +4,7 @@
Using charles proxy to monitor mobile SSL traffics - Stop Talking, Start Doing
-
+
diff --git a/_site/posts/eddl-how-do-we-train-on-limited-edge-devices-part2.html b/_site/posts/eddl-how-do-we-train-on-limited-edge-devices-part2.html
index 7a66aba..1dcbdbf 100644
--- a/_site/posts/eddl-how-do-we-train-on-limited-edge-devices-part2.html
+++ b/_site/posts/eddl-how-do-we-train-on-limited-edge-devices-part2.html
@@ -4,7 +4,7 @@
EDDL: How do we train neural networks on limited edge devices - PART 2 - Stop Talking, Start Doing
-
+
diff --git a/_site/posts/eddl-how-do-we-train-on-limited-edge-devices.html b/_site/posts/eddl-how-do-we-train-on-limited-edge-devices.html
index e8746cb..06482b7 100644
--- a/_site/posts/eddl-how-do-we-train-on-limited-edge-devices.html
+++ b/_site/posts/eddl-how-do-we-train-on-limited-edge-devices.html
@@ -4,7 +4,7 @@
EDDL: How do we train neural networks on limited edge devices - PART 1 - Stop Talking, Start Doing
-
+
diff --git a/_site/posts/generate-word-cloud-with-chinese-fenci.html b/_site/posts/generate-word-cloud-with-chinese-fenci.html
index 27a9e98..bc05751 100644
--- a/_site/posts/generate-word-cloud-with-chinese-fenci.html
+++ b/_site/posts/generate-word-cloud-with-chinese-fenci.html
@@ -4,7 +4,7 @@
Generate Word Cloud Figures with Chinese-Tokenization and WordCloud python libraries - Stop Talking, Start Doing
-
+
diff --git a/_site/posts/intro-xv6.html b/_site/posts/intro-xv6.html
index 3542f80..14f92c1 100644
--- a/_site/posts/intro-xv6.html
+++ b/_site/posts/intro-xv6.html
@@ -4,7 +4,7 @@
Xv6 introduction - Stop Talking, Start Doing
-
+
diff --git a/_site/posts/some-of-my-previews-exper-work.html b/_site/posts/some-of-my-previews-exper-work.html
index c4a6405..fc81df0 100644
--- a/_site/posts/some-of-my-previews-exper-work.html
+++ b/_site/posts/some-of-my-previews-exper-work.html
@@ -4,7 +4,7 @@
Some of my previews experiment works: 2016 - Stop Talking, Start Doing
-
+
diff --git a/_site/posts/welcome-to-my-blog.html b/_site/posts/welcome-to-my-blog.html
index eabe0e4..b3db30a 100644
--- a/_site/posts/welcome-to-my-blog.html
+++ b/_site/posts/welcome-to-my-blog.html
@@ -4,7 +4,7 @@
Stop Talking is the worst title of one blog - Stop Talking, Start Doing
-
+
diff --git a/_site/robots.txt b/_site/robots.txt
index d297064..095b92f 100644
--- a/_site/robots.txt
+++ b/_site/robots.txt
@@ -1 +1 @@
-Sitemap: http://localhost:4000/sitemap.xml
+Sitemap: https://codersherlock.github.io/sitemap.xml
diff --git a/_site/sitemap.xml b/_site/sitemap.xml
index 04d34b1..d8b86cf 100644
--- a/_site/sitemap.xml
+++ b/_site/sitemap.xml
@@ -1,68 +1,68 @@
-http://localhost:4000/posts/welcome-to-my-blog
+https://codersherlock.github.io/posts/welcome-to-my-blog2016-10-26T22:50:33-04:00
-http://localhost:4000/posts/charles-is-not-a-good-tool
+https://codersherlock.github.io/posts/charles-is-not-a-good-tool2016-10-27T22:50:33-04:00
-http://localhost:4000/posts/some-of-my-previews-exper-work
+https://codersherlock.github.io/posts/some-of-my-previews-exper-work2016-10-28T12:27:33-04:00
-http://localhost:4000/posts/intro-xv6
+https://codersherlock.github.io/posts/intro-xv62017-07-28T14:56:55-04:00
-http://localhost:4000/posts/generate-word-cloud-with-chinese-fenci
+https://codersherlock.github.io/posts/generate-word-cloud-with-chinese-fenci2020-09-15T22:00:14-04:00
-http://localhost:4000/posts/eddl-how-do-we-train-on-limited-edge-devices
+https://codersherlock.github.io/posts/eddl-how-do-we-train-on-limited-edge-devices2021-10-13T16:53:20-04:00
-http://localhost:4000/posts/eddl-how-do-we-train-on-limited-edge-devices-part2
+https://codersherlock.github.io/posts/eddl-how-do-we-train-on-limited-edge-devices-part22021-10-31T13:01:14-04:00
-http://localhost:4000/posts/cs350-labs
+https://codersherlock.github.io/posts/cs350-labs2022-02-22T16:08:17-05:00
-http://localhost:4000/about.html
+https://codersherlock.github.io/about.html
-http://localhost:4000/archive
+https://codersherlock.github.io/archive
-http://localhost:4000/
+https://codersherlock.github.io/
-http://localhost:4000/static/2017-07/eps_poster.pdf
+https://codersherlock.github.io/static/2017-07/eps_poster.pdf2020-09-15T16:58:36-04:00
-http://localhost:4000/static/2017-07/p163-hao.pdf
+https://codersherlock.github.io/static/2017-07/p163-hao.pdf2020-09-15T16:58:36-04:00
-http://localhost:4000/static/2017-07/p169-zhang.pdf
+https://codersherlock.github.io/static/2017-07/p169-zhang.pdf2020-09-15T16:58:36-04:00
-http://localhost:4000/static/2018-02/a7-hao.pdf
+https://codersherlock.github.io/static/2018-02/a7-hao.pdf2020-09-15T16:58:36-04:00
-http://localhost:4000/static/2021-10/eta-infocom18.pdf
+https://codersherlock.github.io/static/2021-10/eta-infocom18.pdf2021-11-02T14:19:44-04:00
-http://localhost:4000/static/2021-12/eddl-sec21.pdf
+https://codersherlock.github.io/static/2021-12/eddl-sec21.pdf2021-12-14T19:38:35-05:00