OpenVMS Source-Code Demos

HTTP_FILE_UPLOAD_C

//================================================================================================
// title   : HTTP_FILE_UPLOAD_C_xx.c
// platform: OpenVMS-8.4 (Alpha or Itanium)
// notes   : this program is meant as a drop-in diagnostic to test problems encountered with BASIC
//	     function "ASSET_HTTP_FILE_UPLOAD_22.fun" during the port from Alpha to Itanium.
// created : 2015-10-07
// history :
// ver who when   what
// --- --- ------ --------------------------------------------------------------------------------
//  21 ... ...... 1. original BASIC function to receive uploaded files
//  22 NSR 151007 1. original C program to debug a problem with CSWS-2.2-1 (Apache for OpenVMS)
//		     during Itanium conversion; problem with the BASIC RTL or something else?
//  23 NSR 151008 1. ripped out superfluous routines (not germane to this problem)
//  24 NSR 151009 1. changed this code from a function to a stand-alone program
//		  2. passed this code to HP/HPE with "my request for support"
//  25 NSR 151016 1. found some time to continue developing this program
//     NSR 151019 1. found some time to add a little hex dump
//  26 NSR 151020 1. found some time to add data parsing
//     NSR 151021 1. found some time to add file writing
//  27 NSR 151021 1. started work on strnstr for obvious reasons
//     NSR 151022 1. wasted time locating a childish cut-n-paste bug
//  28 NSR 151023 1. ripped out some debug crud
//		  2. reduced programmed delays
//		  3. now do a better job of prepping the file name
//		  4. note: this program is still only for demonstration use
//     NSR 151127 5. HP/HPE reported that they have a patch for me to test
//  29 NSR 151208 1. started changes required for production use as a function
//	   151210 1. found some time to continue this saga
//  30 NSR 151211 1. started work on a potential end-of-data problem			bf_30.1
//		     (this was something that popped into my head while on vacation last week)
//  31 NSR 151214 0. renamed from "c_http_file_upload_30.c" to "http_file_upload_c_31.c"
//		  1. a few tweaks as I test this for production use as a function
//	   151215 2. rewrote the boundary skew logic (previous code broke with jquery-based AJAX)
//================================================================================================
#define PROGRAM_VER		"http_file_upload_c_31.1"			// only used in Apache logging
#define	FUNCTION		1						// program=0, function=1
#define	__NEW_STARLET		1						//
#define	NSR_DISPLAY_HEX_DATA	0						// 0=off, 1=on
#define	SMALL_STRING		255						//
#define	LARGE_STRING		32700						//
#include <stdio.h>								//
#include <stdlib.h>								//
#include <string.h>								//
#include <ctype.h>								//
#include <errno.h>
//
//	OpenVMS specific stuff
//
#include <descrip.h>								// for vms string descriptors in C
#include <str$routines.h>							// for vms string descriptors in VMS
#include <ssdef.h>								//
#include <lib$routines.h>							//
#include <libwaitdef.h>								//
//
//	VMSIFY
//      a macro for use in the VMS world (VMS strings employ this structure)
//	notes:	this macro can be used to pass VMS strings down to lower modules
//		the $DESCRIPTOR macro does something similar employing sizeof-1
//		this macro combines two operations
//
#define VMSIFY(a,b) {					\
    a.dsc$b_dtype = DSC$K_DTYPE_T;			\
    a.dsc$b_class = DSC$K_CLASS_S;			\
    a.dsc$w_length = strlen(b);				\
    a.dsc$a_pointer = (char *) malloc(strlen(b));	\
    strncpy(a.dsc$a_pointer,b,a.dsc$w_length);		\
}

//------------------------------------------------------------------------------
//	delay()
//	a quick hack to introduce a programmed delay
//------------------------------------------------------------------------------
#if __G_FLOAT != 0
#  define FLOAT_TYPE LIB$K_VAX_F
#elif __D_FLOAT != 0
#  define FLOAT_TYPE LIB$K_VAX_D
#elif __IEEE_FLOAT != 0
#  define FLOAT_TYPE LIB$K_IEEE_S
#else
#  error "Try specifying a floating point qualifier on the compile"
#endif
//
void delay(float delay_time){
    long rc;
    unsigned long float_type = FLOAT_TYPE;
    rc = lib$wait(&delay_time,0,&float_type);
}
void dualout_sl(char*,long);
void dualout_s(char*);
#if (NSR_DISPLAY_HEX_DATA==1)
//------------------------------------------------------------------------------
//	hexdisplay()
//	just a quick hack so I can debug this interface
//------------------------------------------------------------------------------
void hexdisplay(char *p, long count){
    unsigned long z;
    printf("-i-hex data dump:\n");
    for (long j=0; j<count; j=j+16){
	printf("%4x: ",j);
	for (long i=0; i<16; i++){
	    if ((i+j)<count){
		z = p[i+j];
		z = abs(z);
		printf("%2x ",z);
	    }else{
		printf("   ");
	    }
	}
	for (long i=0; i<16; i++){
	    if ((i+j)<count){
		z = p[i+j];
		z = abs(z);
		if (z<32)
		    printf(". ");
		if ((z>=32)&&(z<=126))
		    printf("%c ",z);
		if (z>126)
		    printf(". ");
	    }else{
		printf("  ");
	    }
	}
	printf("\n");
    }
}
#endif
//------------------------------------------------------------------------------
//	strnstr()
//	notes:	1. similar to strstr but scans n bytes in s while ignoring NULL
//		2. this version doesn't stop on a NULL
//------------------------------------------------------------------------------
char * strnstr(const char *s, const char *find, size_t slen) {
	char c, sc;
	size_t len;

	if ((c = *find++) != '\0') {
	    len = strlen(find);
	    do {
		do {
//		    if ((sc = *s++) == '\0' ||	slen-- < 1	)	xx
//			return (NULL);					xx
		    if (			slen-- < 1	)
			return (NULL);
		    sc = *s++;
		} while (sc != c);
		if (len > slen)
		    return (NULL);
	    } while (strncmp(s, find, len) != 0);
	    s--;
	}
	return ((char *)s);
}
//------------------------------------------------------------------------------
//	my_fread()
//	note: conceived at the gym, this hack is more efficient than fread()
//------------------------------------------------------------------------------
long 	my_fread(char* buffer, long unit, long limit, FILE *stream){
	long yada;
	long i;
	for (i=0; i<limit;i++){
	    yada = getc(stream);
	    if (yada==EOF){
		break;
	    }
	    buffer[i] = yada;
	}
	return(i);
}

long gDebug;									// needs to be global
//------------------------------------------------------------------------------
//	dualout
//------------------------------------------------------------------------------
void dualout_s(char *one){							!
	fprintf(stderr,one);							// goes to Apache logs
	if (gDebug>0)
	    fprintf(stdout,one);						// goes to user
}
void dualout_ss(char *one, char *two){
	fprintf(stderr,one, two);
	if (gDebug>0)
	    fprintf(stdout,one,two);
}
void dualout_sl(char *one, long two){
	fprintf(stderr,one, two);
	if (gDebug>0)
	    fprintf(stdout,one,two);
}
void dualout_sls(char *one, long two, char *three){
	fprintf(stderr,one, two, three);
	if (gDebug>0)
	    fprintf(stdout,one,two,three);
}

//==============================================================================
//	main()
//==============================================================================
#if (FUNCTION==0)
    //---------------------------------------------------------------
    //	DEVELOPMENT PROGRAM
    //	a stand-alone program (for testing and maintenance)
    //---------------------------------------------------------------
    long main(long argc, char *argv[]) {
#else
    //---------------------------------------------------------------
    //	PRODUCTON FUNCTION
    //  a long function called from BASIC
    //
    //	BASIC declaration:
    //	    external long function HTTP_FILE_UPLOAD_C(	&
    //		string directory$,			&
    //		string filename$,			&
    //		long debug)
    //  notes: p1_directory	might be specified or blank
    //         p3_debug		might be 0-3
    //         p2_filename	write back the actual filename
    //---------------------------------------------------------------
    long http_file_upload_c(
	struct dsc$descriptor_d *p1_directory,
	struct dsc$descriptor_d *p2_filename,
	long			*p3_debug) {
#endif
    long ContentLength;								// caveat: 2G limit for now
    long i;									//
    long x;									//
    long temp;									//
    char *sod;									// start-of-data
    char *eod;									// end-of-data
    char *p;									//
    char *pRequestMethod;							//
    char *pContentType1;							//
    char *pFileName;								//
    char *pDirectory;								//
    FILE *fd_in;								// file descriptor
    FILE *fd_out;								// file descriptor
    long rc		= 0;							//
    long bytes_r	= 0;							//
    long total_bytes_r	= 0;							// caveat: 2G limit for now
    long remaining	= 0;							//
    long bytes_w	= 0;							//
    long total_bytes_w	= 0;							//
    long max_gulp	= 0;							//
    long zero_counter   = 0;							//
    long read_count     = 0;							//
    long boundary_skew  = 0;							// for Chrome and Firefox
    long text_file	= 0;							// init to 'not text'
    char buffer0[LARGE_STRING+1];						//
    char filename[SMALL_STRING+1];						//
    char directory[SMALL_STRING+1];						//
    char boundary[SMALL_STRING+1];						//
    char scratch1[SMALL_STRING+1];						//
    char contentType2[SMALL_STRING+1];						//
    char fqfs[SMALL_STRING+SMALL_STRING];					// fully qualified file specification
    struct dsc$descriptor_s vms_misc;						//
    //--------------------------------------------------------------------------
    //	real code starts here :-)
    //--------------------------------------------------------------------------
#if (FUNCTION==0)								// this is a program
    gDebug = 1;									//
#else										// this is a function
    memcpy(&gDebug,p3_debug,sizeof(gDebug));					//
#endif
    if (gDebug>0){								//
	printf("Status: 200\n");						// signal okay while we debug
	printf("Cache-Control: no-store\n");					//
	printf("Content-type: text/html\n\n");					// end of HTTP header
	printf("<!DOCTYPE html>\n");						// start of payload
	printf("<html>\n<head></head>\n<body><pre>");				//
    }
#if (FUNCTION==0)
    printf("-i-program             : %s\n",argv[0]);				// display program name to user
    pDirectory = NULL;								//
#else
    dualout_ss("-i-program %s\n", PROGRAM_VER);					// display program name to Apache logs
    if (p1_directory->dsc$w_length>0) {						// if a directory was provided
	pDirectory=malloc(p1_directory->dsc$w_length+5);			//
	if (pDirectory==NULL) {							//
	    dualout_s("-e-insufficient memory\n");				//
            goto error_exit;							// I'll go to c-programmer's hell for this
	}									//
	strncpy(pDirectory, (p1_directory->dsc$a_pointer), (p1_directory->dsc$w_length));
	pDirectory[p1_directory->dsc$w_length] = '\0';				//
    }
#endif
    if (pDirectory==NULL) {							//
	pDirectory = getenv("CGI$UPLOAD_DIRECTORY");				// optional (DCL symbol or OpenVMS logical symbol)
    }										//
    if (pDirectory==NULL) {							//
	sprintf(directory,"csmis$tmp:");					// default upload location
    }else{									//
	sprintf(directory,pDirectory);						//
    }
#if (FUNCTION==0)								//
    printf("-i-directory           : %s\n",directory);				//
#else										//
    dualout_ss("-i-directory %s\n", directory);					//
#endif
    //
    pRequestMethod = getenv("REQUEST_METHOD");					// set by Apache mod_cgi
    if (pRequestMethod==NULL) {							//
	dualout_s("-e-REQUEST_METHOD: NULL\n");					//
        goto error_exit;							//
    }else{									//
	if (gDebug>0)								//
	    printf("-i-REQUEST_METHOD      : %s\n",pRequestMethod);		//
    }										//
    if (strcasecmp(pRequestMethod,"POST")!=0) {					//
	dualout_ss("-e-unsupported request method: %s\n",pRequestMethod);		//
	goto error_exit;							//
    }

    pContentType1 = getenv("CONTENT_TYPE");					// set by Apache mod_cgi
    if (pContentType1==NULL) {							//
        dualout_s("-e-CONTENT_TYPE: NULL\n");					//
	goto error_exit;							//
    }
    //
    //	we only support multipart/form-data but here is what we usually see:
    //	    multipart/form-data; boundary=---------------------------24743530619118
    //
    if (strcasecmp(pContentType1,"application/x-www-form-urlencoded")==0) {	//
	dualout_ss("-e-unsupported content type: %s\n",pContentType1);		//
	goto error_exit;							//
    }
    if (gDebug>0)								//
	printf("-i-CONTENT_TYPE        : %s\n",pContentType1);			//
    //
    p = strstr(pContentType1,"boundary=");					//
    if (p==NULL){								//
	dualout_s("-e-BOUNDARY: NULL\n");					//
	goto error_exit;							//
    }else{									//
	sprintf(boundary,p+9);							// skip over the text (scoop up the rest)
	if (gDebug>0)								//
	    printf("-i-defined boundary    : %s\n", boundary);			//
	dualout_ss("-i-defined boundary    : %s\n", boundary);			//
    }										//
    //
    p = getenv("CONTENT_LENGTH");						// set by Apache mod_cgi
    if (p!=NULL) {								//
	ContentLength = atoi(p);						// caveat: 2G limit
    } else {									//
	ContentLength = 0;							//
    }
    if (gDebug>0)								//
	printf("-i-CONTENT_LENGTH      : %ld\n",ContentLength);			//
    fd_in = fopen("APACHE$INPUT","rb");						//
    delay(0.500);								// delay 500 mS b4 first read
    //--------------------------------------------------------------------------
    //	time to get down to work
    //--------------------------------------------------------------------------
    read_loop:
    //
    //	note: as this program is currently written, some transactions will split the final boundary across
    //	the second last and last block. Setting max_gulp to something a little smaller will prevent this.
    //
    remaining = ContentLength - total_bytes_r;					//					bf_30.1
    if (remaining <= LARGE_STRING)						// enough buffer space to read this?	bf_30.1
	max_gulp = LARGE_STRING;						// yes (use max buf)
    else{									//
	if ((remaining - LARGE_STRING) >= 100)					// will the next read leave at least 100?
	    max_gulp = LARGE_STRING;						// yes
	else									// no
	    max_gulp = LARGE_STRING - 100;					// so do a partial read
    }
    //
    bytes_r = 0;								// init
    read_count++;								// update
    if (fd_in) {
//	bytes_r =    fread(buffer0,1,LARGE_STRING,fd_in);			xx
//	bytes_r = my_fread(buffer0,1,LARGE_STRING,fd_in);			xx
	bytes_r = my_fread(buffer0,1,max_gulp    ,fd_in);			//					bf_30.1
    }
//  buffer0[bytes_r] = '\0';							// for printf in older debug
    total_bytes_r = total_bytes_r + bytes_r;					//
    if (gDebug>0) {								//
	printf("-i-read_count          : %ld\n",read_count);			//
	printf("-i-bytes       received: %ld\n",bytes_r);			//
	printf("-i-total bytes received: %ld\n",total_bytes_r);			//
    }
    //
    //	we need to do a little prep after the first read
    //
    if (read_count==1){								// if first read
	//
	//	test/repair the boundary
	//
	p = strstr(buffer0,boundary);						// locate the primary boundary in the data
	if (p==NULL){								// this should never happen
	    dualout_s("-e-could not find initial boundary\n");			//
	    goto error_exit;
	}
	boundary_skew = p-buffer0;						// did it start at zero? (maybe)
	if (gDebug>0)								//
	    printf("-i-boundary skew       : %ld\n",boundary_skew);		//
	//
	//	locate filename		IE: "c:\path\yada.txt" or "c:\yada.txt"
	//				FF: "yada.txt"
	//
	p = strstr(buffer0,"filename=\"");					//
	if (p==NULL){								//
	    dualout_s("-e-could not find filename\n");				//
	    goto error_exit;							//
	}
	p = p + 10;								//
	filename[0] = '\0';							//
	for (long i=0; i<SMALL_STRING; i++){					//
	    if (p[i]=='\0')							//
		break;								//
	    if (p[i]=='"')							//
		break;								//
	    filename[i  ] = toupper(p[i]);					//
	    filename[i+1] = '\0';						//
	}
	//
	//	IE patch
	//
	do {
	    p = strstr(filename,"\\");						// any backslash here?
	    if (p!=NULL){							// yes
		sprintf(scratch1,p+1);						//
		sprintf(filename,scratch1);					//
	    }
	} while (p!=NULL);							//
	if (strlen(filename)<3){						//
	    dualout_s("-e-filename too small\n");				//
	    goto error_exit;							//
	}
	//
	//	remove spaces, plus signs, etc. (can't save to OpenVMS)
	//
	for (long i=0, j=0; (i<strlen(filename)); i++){				//
	    switch(filename[i]){						//
		case	' ':							// space
		case	'\t':							// tab
		case	'_':							// underscore
			scratch1[j]='-';					// replace with dash
			j++;							//
			scratch1[j]='\0';					//
			continue;						//
		case	'.':							//
		case	'-':							//
			scratch1[j]=filename[i];				// transcribe
			j++;							//
			scratch1[j]='\0';					//
			continue;						//
	    }									//
	    if (isalnum(filename[i])) {						//
		scratch1[j]=filename[i];					// transcribe
		j++;								//
		scratch1[j]='\0';						//
		continue;							//
	    }									//
	}
	sprintf(filename,"%s",scratch1);					//
	//
	// 	ensure only one dot (scan from right to left)
	//
	for (long i=strlen(filename), k=0; i!=0; i--){				//
	    if (filename[i]=='.'){						//
		k++;								// dot counter
		if (k>1) {							// if not the first from the end
		    filename[i]='-';						// replace with dash
		}								//
	    }									//
	}									//
	//
	//	reduce multiple dashes to one
	//
	for (long i=0, j=0, k=0; (i<strlen(filename)); i++){			//
	    if (filename[i]=='-'){						//
		k++;								// dash counter
		if (k>1)							// if more than one in a row
		    continue;							// just ignore it
	    }else{								//
		k=0;								// reset
	    }
	    scratch1[j]=filename[i];						// transcribe
	    j++;								//
	    scratch1[j]='\0';							//
	}
	sprintf(filename,"%s",scratch1);					//
	if (strlen(filename)<3){						//
	    dualout_s("-e-filename too small\n");				//
	    goto error_exit;							//
	}
	sprintf(fqfs,"%s%s",directory,filename);				//
	if (gDebug>0){								//
	    printf("-i-filename            : %s\n",filename);			//
	    printf("-i-fqfs                : %s\n",fqfs);			//
	}
	//
	//	locate content-type without the trailing space
	//
	p = strstr(buffer0,"Content-Type:");					//
	if (p==NULL){								//
	    dualout_s("-e-could not find Content-Type\n");			//
	    goto error_exit;							//
	}
	p = p + 13;								//
	contentType2[0] = '\0';							// init
	for (long i=0; i<SMALL_STRING; i++){					//
	    if (p[i]!='\x0d'){							// if not <cr>
		contentType2[i  ] = p[i];					// then copy data
		contentType2[i+1] = '\0';					// and advance the terminator
	    }else{								//
		break;								//
	    }									//
	}
	if (gDebug>0)								//
	    printf("-i-ContentType         : %s\n",contentType2);		//
	//
	//	is this a text file?
	//
	p = strstr(contentType2, "text/");					//
	if (p!=NULL)								//
	    text_file = 1;							//
	p = strstr(filename, ".XML");						//
	if (p!=NULL)								//
	    text_file = 1;							//
	p = strstr(filename, ".CSV");						//
	if (p!=NULL)								//
	    text_file = 1;							//
	p = strstr(filename, ".MHT");						//
	if (p!=NULL)								//
	    text_file = 1;							//
	if (gDebug>0)								//
	    printf("-i-text file?          : %d\n", text_file);			//
	//
	sod = strstr(buffer0, boundary);					//
	if (sod==NULL){								//
	    dualout_s("-e-could not find the working boundary\n");		//
	    goto error_exit;							//
	}
	sod = sod + strlen(boundary);						//
	sod = strstr(sod, "\x0d\x0a\x0d\x0a");					// look for end of header
	if (sod==NULL){								//
	    dualout_s("-e-could not find start-of-data marker\n");		//
	    goto error_exit;							//
	}
	sod = sod + 4;								// this is the start-of-data
	//
	if (text_file==0){							// if not a text file
	     fd_out = fopen(fqfs,"wb","rat=none","rfm=fix","mrs=512");		// fixed block sizes
	}else{									// else a test file
	     fd_out = fopen(fqfs,"wb","rat=none","rfm=stm");			// no record attributes; stream
	}
	if (fd_out==NULL) {							//
	    dualout_sls("-e-error: %ld while opening file : %s\n",errno,fqfs);	//
	    goto error_exit;							//
	}
    }else{									// else not first read
	sod = buffer0;								// always zap to zero
    }
//------------------------------------------------------------------------------
//	process received data
//	entry:	buffer0		= full data buffer
//		bytes_r		= byte count
//		*SOD		= start-of-data
//------------------------------------------------------------------------------
#if (NSR_DISPLAY_HEX_DATA==1)
    if (gDebug>1)								// more than simple debugging
	hexdisplay(buffer0,bytes_r);						//
#endif
//------------------------------------------------------------------------------
    if (total_bytes_r != ContentLength) {					// if not complete
	if (bytes_r==0) {							// oops, nothing received
	    zero_counter++;							//
	    if (gDebug>0)							//
		printf("-i-zero_counter: %ld\n",zero_counter);			//
	    if (zero_counter < 5){						//
		delay(1.0);							// delay 1 S
		if (gDebug>0)							//
		    printf("-i-fetching more data\n");				//
		goto read_loop;							//
	    }else{								//
		dualout_s("-e-message: DATA IS MISSING\n");			//
		goto error_exit;						//
	    }
        }else{									// something received
	    temp = bytes_r - (sod - buffer0);					//
//	    bytes_w = fwrite(sod,1,temp,fd_out);				// why does this write wide characters?
	    for (bytes_w=0; bytes_w<temp; bytes_w++){				//
		x = fputc(sod[bytes_w], fd_out);				// much better
	    }
	    total_bytes_w = total_bytes_w + bytes_w;				//
	    if (gDebug>0){							//
		printf("-i-wrote bytes         : %ld\n",	bytes_w);	//
		printf("-i-wrote total bytes   : %ld\n",	total_bytes_w);	//
	    }
	    delay(0.010);							// delay 10 mS before next read
	    goto read_loop;
	}
    }
    //
    //	if all the bytes have been received then look for the boundary marker
    //
    if (total_bytes_r == ContentLength){
	eod = strnstr(sod, boundary,bytes_r);
	if (eod==NULL){
	    dualout_s("-e-could not find ending boundary\n");
	    goto error_exit;
	}
	//
	// caveat: both Chrome and Firefox report a boundary two bytes shorter than what was used so at this point we back up
	// skew bytes (if I was really careful I would check to make sure they are dashes). The we need to back up to more
	// bytes to skip over an end-of-line pair (if I was really careful I would check to make sure they are \x0d and \x0a)
	//
	temp = eod - sod - 2 - boundary_skew;
	if (temp>0){
//	    bytes_w = fwrite(sod,0,temp,fd_out);				// why does this write wide characters?
	    for (bytes_w=0; bytes_w<temp; bytes_w++){
		x = fputc(sod[bytes_w], fd_out);				// much better
	    }
	    total_bytes_w = total_bytes_w + bytes_w;
	    if (gDebug>0){
		printf("-i-wrote bytes         : %ld\n",	bytes_w);
		printf("-i-wrote total bytes   : %ld\n",	total_bytes_w);
	    }
	}
    }
//------------------------------------------------------------------------------
    if (total_bytes_r == ContentLength){
	if (gDebug>0){
	    printf("-i-message: ALL DATA WAS RECEIVED\n");
	    printf("-i-total bytes received: %ld\n",total_bytes_r);
	    printf("-i-total bytes saved   : %ld\n",total_bytes_w);
	}
    }
    rc = 1;									// OpenVMS-s-
    goto exit_common;
    //
    error_exit:
    rc = 2;									// OpenVMS-e-
    //
    exit_common:
    if (fd_in) {
	fclose(fd_in);
    }
    if (fd_out) {
	fclose(fd_out);
    }
    if (gDebug>0){
	printf("-i-program is exiting\n");
	printf("</pre>\n</body>\n</html>");
    }
#if (FUNCTION==0)
    return(1);									// OpenVMS-s-
#else
    VMSIFY(vms_misc, filename);							//
    rc = str$copy_dx(p2_filename, &vms_misc);					// tell function what the filename was
    return(rc);									// OpenVMS-?-
#endif
}