I want a simple service I can deploy that lets me store blobs. I want it to return the hash of the stored object which I will use to load it again.
First we will write a simple go service that will do everything in memory, and then we will build a nginx config that has the webserver stream it to disk, so we don't have a lot of memory being used.
Simple go service
This is a simple http server that
Creates a
blobs
directoryServes
/get
requests out of that directoryReceives a file paramater named
file
on/put
and stores it as it's md5 hash. It returns that hash.Or it gets a body and just stores it.
Server code
package main
import (
"crypto/md5"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
)
func putHandler(w http.ResponseWriter, r *http.Request) {
log.Print("Proccessing file upload")
contentType := r.Header.Get( "Content-Type" );
if( strings.HasPrefix( "multipart/form-data", contentType ) ) {
uploadFile( w, r );
} else {
handlePost( w, r );
}
}
func uploadFile(w http.ResponseWriter, r *http.Request) {
log.Print( "Processing file upload" )
// Set upload limit
r.ParseMultipartForm(10 << 20)
file, handler, err := r.FormFile("file")
if err != nil {
fmt.Println("Error Retrieving the File")
fmt.Println(err)
return
}
defer file.Close()
log.Printf("Uploaded File: %+v\n", handler.Filename)
log.Printf("Content type : %+v\n", handler.Header.Get("Content-Type"))
log.Printf("File Size : %+v\n", handler.Size)
log.Printf("MIME Header : %+v\n", handler.Header)
// read all of the contents of our uploaded file into a
// byte array
fileBytes, err := ioutil.ReadAll(file)
if err != nil {
log.Print(err)
fmt.Println(err)
return
}
md5string := fmt.Sprintf("%x", md5.Sum(fileBytes))
ioutil.WriteFile(fmt.Sprintf("blobs/%s", md5string), fileBytes, 0666)
// Return the key
fmt.Fprintf(w, "%s", md5string)
}
func handlePost(w http.ResponseWriter, r *http.Request) {
log.Print( "Storing raw post" )
body, _ := ioutil.ReadAll( r.Body )
defer r.Body.Close()
md5string := fmt.Sprintf("%x", md5.Sum(body))
ioutil.WriteFile(fmt.Sprintf("blobs/%s", md5string), body, 0666)
// Return the key
fmt.Fprintf(w, "%s", md5string)
}
func mkdir_p(dir string) {
_, err := os.Stat(dir)
if os.IsNotExist(err) {
log.Print("Creating ", dir)
errDir := os.MkdirAll(dir, 0755)
if errDir != nil {
log.Fatal(err)
}
}
}
func main() {
mkdir_p("blobs")
fs := http.FileServer(http.Dir("./blobs"))
http.Handle("/get/", http.StripPrefix("/get/", fs))
http.HandleFunc("/put", putHandler)
log.Print("Starting server on port 8080")
if err := http.ListenAndServe(":8080", nil); err != nil {
log.Fatal(err)
}
}
Server code orig
In simple/server.go
:
package main
import (
"crypto/md5"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
)
func uploadFile(w http.ResponseWriter, r *http.Request) {
log.Print("Proccessing file upload")
// Set upload limit
r.ParseMultipartForm(10 << 20)
file, handler, err := r.FormFile("file")
if err != nil {
fmt.Println("Error Retrieving the File")
fmt.Println(err)
return
}
defer file.Close()
log.Printf("Uploaded File: %+v\n", handler.Filename)
log.Printf("Content type : %+v\n", handler.Header.Get("Content-Type"))
log.Printf("File Size : %+v\n", handler.Size)
log.Printf("MIME Header : %+v\n", handler.Header)
// read all of the contents of our uploaded file into a
// byte array
fileBytes, err := ioutil.ReadAll(file)
if err != nil {
log.Print(err)
fmt.Println(err)
return
}
md5string := fmt.Sprintf("%x", md5.Sum(fileBytes))
ioutil.WriteFile(fmt.Sprintf("blobs/%s", md5string), fileBytes, 0666)
// Return the key
fmt.Fprintf(w, "%s", md5string)
}
func mkdir_p(dir string) {
_, err := os.Stat(dir)
if os.IsNotExist(err) {
log.Print("Creating ", dir)
errDir := os.MkdirAll(dir, 0755)
if errDir != nil {
log.Fatal(err)
}
}
}
func main() {
mkdir_p("blobs")
fs := http.FileServer(http.Dir("./blobs"))
http.Handle("/get/", http.StripPrefix("/get/", fs))
http.HandleFunc("/put", uploadFile)
log.Print("Starting server on port 8080")
if err := http.ListenAndServe(":8080", nil); err != nil {
log.Fatal(err)
}
}
Testing
First start it up:
go run server.go
Then upload the file using curl -F
. We need to name the parameter
file
and use the @
syntax to push the file contents.
curl -F file="@/home/wschenk/mobiledownloads/talk.pdf" http://localhost:8080/put
a03a16aa4ed93c7194c03bb3d759ba23
Which returns the has, then we can download it
echo KEY is ${KEY}
curl -o /tmp/talk.pdf http://localhost:8080/get/${KEY}
ls -l /tmp/talk.pdf
md5sum /tmp/talk.pdf
KEY is a03a16aa4ed93c7194c03bb3d759ba23 -rw-r--r-- 1 wschenk wschenk 2227748 Feb 16 09:19 /tmp/talk.pdf a03a16aa4ed93c7194c03bb3d759ba23 /tmp/talk.pdf
Dockerizing
First we don't want to put the blobs into our docker image, so create
a .dockerignore
:
blobs/
Then a simple Dockerfile
:
FROM golang:1.15.8-alpine3.13 as builder
COPY server.go .
RUN go build server.go
FROM alpine:3.13
WORKDIR /app
COPY --from=builder /go/server .
EXPOSE 8080
CMD [ "./server" ]
And then build it:
docker build . -t simpleblobserver
And run it
docker run -it --rm -p 8080:8080 simpleblobserver
NGINX uploader
This works fine, but it also requires loading everything into memory.
We can use nginx and the nginx-upload-module
to have the webserver
stream it directly to disk, and once this is done it will call our
handler which will move it over to the blobs
directory. This module
also computes the md5
for us, so that's nice an easy. But setting it
up is more complicated, and we'll need to use docker-compose.yml
to
wire everything together.
docker-compose.yml
to wire it all togethernginx
Dockerfile
default.conf
to configure the modulego mover
Dockerfile
mover code
Let go!
docker-compose.yml
We'll define two services, which share a file system at /blobs
version: "3.7"
services:
nginx:
build:
context: .
dockerfile: Dockerfile.nginx
volumes:
- type: bind
source: ./blobs
target: /blobs
ports:
- "8080:80"
mover:
build:
context: .
dockerfile: Dockerfile.mover
volumes:
- type: bind
source: ./blobs
target: /blobs
ports:
- "9090:8080"
nginx Dockerfile
First we create a Dockerfile.nginx
to download the source for both
nginx
and nginx-upload-module
, build then, add it to the main
nginx.conf
file:
FROM nginx:1.19.6-alpine AS builder
WORKDIR /usr/src
# For latest build deps, see https://github.com/nginxinc/docker-nginx/blob/master/mainline/alpine/Dockerfile
RUN apk add --no-cache --virtual .build-deps \
gcc \
libc-dev \
make \
openssl-dev \
pcre-dev \
zlib-dev \
linux-headers \
curl \
gnupg \
libxslt-dev \
gd-dev \
geoip-dev \
git
# Download sources
RUN wget "http://nginx.org/download/nginx-${NGINX_VERSION}.tar.gz" -O nginx.tar.gz
RUN git clone --depth 1 https://github.com/vkholodkov/nginx-upload-module
# Reuse same cli arguments as the nginx:alpine image used to build
RUN CONFARGS=$(nginx -V 2>&1 | sed -n -e 's/^.*arguments: //p') \
tar -zxC /usr/src -f nginx.tar.gz && \
MODDIR="$(pwd)/nginx-upload-module" && \
cd /usr/src/nginx-$NGINX_VERSION && \
./configure --with-compat $CONFARGS --add-dynamic-module=$MODDIR && \
make && make install
FROM nginx:1.19.6-alpine
# Add the module to the main nginx configuration
COPY --from=builder /usr/local/nginx/modules/ngx_http_upload_module.so /usr/local/nginx/modules/ngx_http_upload_module.so
RUN echo -e "load_module /usr/local/nginx/modules/ngx_http_upload_module.so;\n$(cat /etc/nginx/nginx.conf)" > /etc/nginx/nginx.conf
COPY default.conf /etc/nginx/conf.d/default.conf
EXPOSE 80
STOPSIGNAL SIGTERM
CMD ["nginx", "-g", "daemon off;"]
nginx default.config
Couple of things of note in this default.conf
file:
client_max_body_size | set to 2 gigs |
/get | serves from /blobs directly |
/put | Stores stuff into /blobs/upload and calls /mover on success |
error 415 | just post to /mover |
nginx | computes the mp5 hash |
server {
listen 80;
server_name localhost;
client_max_body_size 2000m;
#charset koi8-r;
#access_log /var/log/nginx/host.access.log main;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
location /get {
rewrite /get/(.*) /$1 break;
root /blobs;
}
location /put {
error_page 415 = /mover;
# Pass altered request body to this location
upload_pass /mover;
# Store files to this directory
# The directory is hashed, subdirectories 0 1 2 3 4 5 6 7 8 9 should exist
upload_store /blobs/upload 1;
# Allow uploaded files to be read only by user
upload_store_access user:r;
# Set specified fields in request body
upload_set_form_field "${upload_field_name}_name" $upload_file_name;
upload_set_form_field "${upload_field_name}_content_type" $upload_content_type;
upload_set_form_field "${upload_field_name}_path" $upload_tmp_path;
# Inform backend about hash and size of a file
upload_aggregate_form_field "${upload_field_name}_md5" $upload_file_md5;
upload_aggregate_form_field "${upload_field_name}_size" $upload_file_size;
upload_pass_form_field "^submit$|^description$";
}
location /mover {
proxy_pass http://mover:8080;
}
}
mover Dockerfile.mover
This is a simple dockerfile that builds our go binary, and then just copies it over.
FROM golang:1.15.8-alpine3.13 as builder
COPY mover.go .
RUN go build mover.go
FROM alpine:3.13
WORKDIR /app
COPY --from=builder /go/mover .
EXPOSE 8080
CMD [ "./mover" ]
mover go code
All this really is doing is to look at the header and move the file around to the right path.
package main
import (
"crypto/md5"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/http/httputil"
"os"
"strings"
)
func formHandler(w http.ResponseWriter, r *http.Request) {
contentType := r.Header.Get("Content-Type")
log.Printf( "Content-Type %s\n", contentType );
if strings.HasPrefix(contentType,"multipart/form-data") {
moveFile(w, r)
} else {
saveFile(w, r)
}
}
func moveFile(w http.ResponseWriter, r *http.Request) {
r.ParseMultipartForm(10 << 20)
// Save a copy of this request for debugging.
requestDump, err := httputil.DumpRequest(r, true)
if err != nil {
fmt.Println(err)
}
fmt.Println(string(requestDump))
if err := r.ParseForm(); err != nil {
fmt.Fprintf(w, "ParseForm() err: %v", err)
return
}
log.Print("POST request successful")
log.Printf("Filename : %s\n", r.FormValue("file_name"))
log.Printf("Content Type : %s\n", r.FormValue("file_content_type"))
log.Printf("MD5 : %s\n", r.FormValue("file_md5"))
log.Printf("Size : %s\n", r.FormValue("file_size"))
log.Printf("Path : %s\n", r.FormValue("file_path"))
md5 := r.FormValue("file_md5")
err = os.Rename(r.FormValue("file_path"), fmt.Sprintf("/blobs/%s", md5))
if err != nil {
fmt.Print(err)
} else {
fmt.Fprintf(w, "%s", md5)
}
}
func saveFile(w http.ResponseWriter, r *http.Request) {
log.Print("Storing raw post")
body, _ := ioutil.ReadAll(r.Body)
defer r.Body.Close()
md5string := fmt.Sprintf("%x", md5.Sum(body))
ioutil.WriteFile(fmt.Sprintf("blobs/%s", md5string), body, 0666)
// Return the key
fmt.Fprintf(w, "%s", md5string)
}
func main() {
http.HandleFunc("/", formHandler)
http.HandleFunc("/mover", formHandler)
log.Print("Starting server at port 8080")
if err := http.ListenAndServe(":8080", nil); err != nil {
log.Fatal(err)
}
}
Setup
We don't really need this, but it's a good idea to make sure that the blobs don't go over as part of the build.
.dockerignore
:
blobs/
Lets create the blobs
folder, the upload
subdirectories, and make sure
that docker and read and write them:
mkdir -p blobs/upload/{0..9}
chmod -R 777 blobs
Then start it all up with:
docker-compose up
Testing
curl -F file="@/home/wschenk/mobiledownloads/talk.pdf" http://localhost:8080/put
a03a16aa4ed93c7194c03bb3d759ba23
Which returns the has, then we can download it
echo KEY is ${KEY}
curl -o /tmp/talk.pdf http://localhost:8080/get/${KEY}
ls -l /tmp/talk.pdf
md5sum /tmp/talk.pdf
KEY is a03a16aa4ed93c7194c03bb3d759ba23 -rw-r--r-- 1 wschenk wschenk 2227748 Feb 16 11:31 /tmp/talk.pdf a03a16aa4ed93c7194c03bb3d759ba23 /tmp/talk.pdf
Client examples
Bash posting data
curl -d "This is my string" http://localhost:8080/put
c2a9ce57e8df081b4baad80d81868bbb
Bash posting file
We've already seen this:
curl -F file="@/home/wschenk/mobiledownloads/talk.pdf" http://localhost:8080/put
a03a16aa4ed93c7194c03bb3d759ba23
Ruby posting data
client/ruby_data.rb
:
require 'net/http'
res = Net::HTTP.post( URI( 'http://localhost:8080/put' ), 'This is my string' )
puts res.body
c2a9ce57e8df081b4baad80d81868bbb
Ruby posting data as a file
client/ruby_data_as_file.rb
:
require 'net/http'
def write_string_blob( host, data )
uri = URI(host)
req = Net::HTTP::Post.new( uri.path )
req.set_form([['file', 'This is my string', {filename: 'test'}]], 'multipart/form-data')
res = Net::HTTP.start(uri.hostname, uri.port) do |http|
http.request(req)
end
res.body
end
puts write_string_blob( 'http://localhost:8080/put', 'this is my data' )
c2a9ce57e8df081b4baad80d81868bbb
Ruby posting file
client/ruby_file.rb
:
require 'net/http'
def write_file_blob( host, file )
uri = URI(host)
req = Net::HTTP::Post.new( uri.path )
req.set_form([['file', File.open( file )]], 'multipart/form-data')
res = Net::HTTP.start(uri.hostname, uri.port) do |http|
http.request(req)
end
res.body
end
puts write_file_blob( 'http://localhost:8080/put', '/home/wschenk/mobiledownloads/talk.pdf' )
a03a16aa4ed93c7194c03bb3d759ba23
node posting data
Requires node-fetch
npm package.
client/node_string.js
:
const fetch = require( 'node-fetch' )
fetch( 'http://localhost:8080/put',
{method: 'POST', body: 'This is my string'} )
.then( (res) => res.text() )
.then( (res) => console.log( res ) )
node posting string as file
Requires node-fetch
and form-data
packages:
client/node_string_as_file.js
:
const fetch = require( 'node-fetch' );
const FormData = require( 'form-data' );
function write_string_blob( url, string ) {
const form = new FormData();
form.append('file', string, {filename: 'test'} );
const options = {
method: 'POST',
credentials: 'include',
body: form
};
return fetch(url, { ...options })
.then(res => {
if (res.ok) return res.text();
throw res;
});
}
write_string_blob( 'http://localhost:8080/put', 'This is my string').
then( (res) => console.log( res ) )
node posting file
First we need to install some libraries:
npm init -y
npm add node-fetch form-data
Then:
const fetch = require( 'node-fetch' );
const FormData = require( 'form-data' );
const fs = require('fs');
const path = require('path')
function write_file_blob( url, filename ) {
const form = new FormData();
const buffer = fs.readFileSync(filename);
form.append('file', buffer, {filename: path.basename( filename )} );
const options = {
method: 'POST',
credentials: 'include',
body: form
};
return fetch(url, { ...options })
.then(res => {
if (res.ok) return res.text();
throw res;
});
}
write_file_blob(
'http://localhost:8080/put',
'/home/wschenk/mobiledownloads/talk.pdf'
).
then( (res) => console.log( res ), (rej) => console.log( rej ) )
Deno posting string
client/deno_string.ts
:
fetch( 'http://localhost:8080/put',
{method: 'POST', body: 'This is my string'} )
.then( (res) => res.text() )
.then( (res) => console.log( res ) )
Deno posting string as file
client/deno_string_as_file.ts
:
const form = new FormData()
const blob = new Blob(['This is my string'])
form.append( 'file', blob, 'testfilename')
const options = {
method: 'POST',
body: form }
fetch( 'http://localhost:8080/put', {...options})
.then( res => res.text() )
.then( res => console.log( res ) );
Deno posting file
const form = new FormData()
const file = await Deno.readFile( '/home/wschenk/mobiledownloads/talk.pdf' )
const blob = new Blob( [file] )
form.append( 'file', blob, 'testfilename')
const options = {
method: 'POST',
body: form }
fetch( 'http://localhost:8080/put', {...options})
.then( res => res.text() )
.then( res => console.log( res ) );
go posting string
client/go_string.go
:
package main
import (
"fmt"
"io/ioutil"
"net/http"
"strings"
)
func write_string_blob(uri string, message string) (string, error) {
body := strings.NewReader(message)
client := &http.Client{}
req, err := http.NewRequest("POST", uri, body)
req.Header.Add("Content-Type", "application/octet-stream")
if err != nil {
return "", err
}
resp, err := client.Do(req)
if err != nil {
return "", err
} else {
body, _ := ioutil.ReadAll(resp.Body)
resp.Body.Close()
return string(body), nil
}
}
func main() {
md5, err := write_string_blob("http://localhost:8080/put", "This is my string")
if err != nil {
panic(err)
}
fmt.Print(md5)
}
go posting string as file
client/go_string_as_file.go
:
package main
import (
"bytes"
"fmt"
"io/ioutil"
"mime/multipart"
"net/http"
)
func write_string_as_file(uri string, message string) (string, error) {
body := new(bytes.Buffer)
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile("file", "filename")
if err != nil {
return "", err
}
part.Write([]byte(message))
err = writer.Close()
if err != nil {
return "", err
}
client := &http.Client{}
req, err := http.NewRequest("POST", uri, body)
req.Header.Add("Content-Type", writer.FormDataContentType())
if err != nil {
return "", err
}
resp, err := client.Do(req)
if err != nil {
return "", err
} else {
body, _ := ioutil.ReadAll(resp.Body)
resp.Body.Close()
return string(body), nil
}
}
func main() {
md5, err := write_string_as_file("http://localhost:8080/put", "This is my string")
if err != nil {
panic(err)
}
fmt.Print(md5)
}
go posting file
client/go_string_as_file.go
:
package main
import (
"bytes"
"fmt"
"io/ioutil"
"mime/multipart"
"net/http"
"os"
)
func write_file_blob(uri string, path string) (string, error) {
body := new(bytes.Buffer)
file, err := os.Open(path)
if err != nil {
return "", err
}
fileContents, err := ioutil.ReadAll(file)
if err != nil {
return "", err
}
fi, err := file.Stat()
if err != nil {
return "", err
}
file.Close()
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile("file", fi.Name())
if err != nil {
return "", err
}
part.Write(fileContents)
err = writer.Close()
if err != nil {
return "", err
}
client := &http.Client{}
req, err := http.NewRequest("POST", uri, body)
req.Header.Add("Content-Type", writer.FormDataContentType())
if err != nil {
return "", err
}
resp, err := client.Do(req)
if err != nil {
return "", err
} else {
body, _ := ioutil.ReadAll(resp.Body)
resp.Body.Close()
return string(body), nil
}
}
func main() {
md5, err := write_file_blob("http://localhost:8080/put", "/home/wschenk/mobiledownloads/talk.pdf")
if err != nil {
panic(err)
}
fmt.Print(md5)
}
Final thoughts
The reason that I wrote this is so that I could easily share large blobs of data between cloud functions without the overhead of installing a S3 clone or trying to jam stuff into Redis.