(*  Copyright (c) 2001 Anthony L Shipman *)

(* $Id: open_mgr.sml,v 1.19 2002/01/19 16:01:42 felix Exp $ *)

(*  This manages open files.  It blocks the caller if there are no
    more file descriptors.  It reaps open files that become garbage
    because of broken connections etc.

    There is a generic module for the logic of opening and closing
    and finalising of files. This is specialised to BinIO and TextIO
    and directories.  A file stream will be closed if the file token
    becomes garbage.

    There is a separate open file counter that allocates file descriptors
    to limit the number of simultaneous open files. We use a limit of
    1/2 of the maximum to allow slack for unknown file activity.

    Clients can just allocate file descriptors by themselves for example
    each socket connection will allocate and release a file descriptor
    to make the books balance.

    We limit the number of open files to one half of the maximum
    number of files that can be open. This allows a file descriptor
    for a connection and another for the file and one more for luck.
    But we also check to see if we have run out of file descriptors
    anyway and make the client wait until more are available.

    REVISIT - instead subtract the maximum number of connections from
    open max and then another 100 for slack.  Check that this is still
    large enough.

@#34567890123456789012345678901234567890123456789012345678901234567890
*)

(*==============================================================================*)

signature OPEN_COUNTER =
sig

    (*	This represents some number of file descriptors. It ensures
	that a release matches the allocation.
    *)
    type Allocation


    (*	The protocol consists of:

	1.  The client requests n file descriptors. The allocation will
	    be returned on the channel when available.

	2.  When the allocation is received try to open the files.

	3.  Return a success/fail/retry response to the counter. The
	    counter will block waiting for this response.  This serialises
	    all opens but this shouldn't hurt when running on a single
	    processor machine.

	    If the open was successful then go to step 4.

	    If the open failed due to insufficient file descriptors
	    because some other part of the program has taken them all
	    then the client will be queued. When another file descriptor
	    becomes available the client can continue at step 2.

	    If the open failed for some other reason then the allocation
	    will be released.

	4.  Use the files.

	5.  Release the allocation.
    *)

    datatype Response =
	    Success
	|   Fail of Allocation
	|   Retry of Allocation

    (*	Return the response on the supplied channel. *)
    type Start = Allocation * Response CML.chan

    (*	Pass in a channel to receive the start message. *)
    val request:   (int * Start CML.chan) -> unit

    (*	Release n file descriptors. *)
    val release:    Allocation -> unit

    (*	Return the number open and the number pending. *)
    val stats:	    unit -> int * int
end


structure OpenCounter: OPEN_COUNTER =
struct
    open Common

    structure Sy = SyncVar
    structure TF = TextFrag
    structure G  = Globals

(*------------------------------------------------------------------------------*)

    datatype Allocation = Allocation of int

    and Response =
	    Success
	|   Fail of Allocation
	|   Retry of Allocation

    type Start = Allocation * Response CML.chan


    (*	This is the protocol for the counter object.
    *)

    datatype CtrMsg =
	    CtrRequest of int * Start CML.chan
	|   CtrRelease of int
	|   CtrStats   of (int * int) Sy.ivar 	(* num open, num pending *)


    datatype Pending = Pending of {
	    schan:	Start CML.chan,	(* where to restart the open *)
	    alloc:	Allocation	(* how much we allocated originally *)
	    }


    (*	We only put out one of warning per second to avoid flooding
	the log. At the same time we trigger a major collection in the
	hope of collecting some lost open files via finalisation.
    *)
    datatype State = State of {
    	    in_use:	int,
	    pending:	Pending list,
	    last_warn:	Time.time option    (* time of the last warning *)
	    }


    (*	State changers. *)
    fun incr_in_use (State {in_use, pending, last_warn}) n =
    (
	State {
    	    in_use	= in_use + n,
	    pending	= pending,
	    last_warn	= last_warn
	    }
    )


    fun set_pending (State {in_use, pending, last_warn}) new =
    (
	State {
    	    in_use	= in_use,
	    pending	= new,
	    last_warn	= last_warn
	    }
    )


    fun set_last_warn (State {in_use, pending, last_warn}) now =
    (
	State {
    	    in_use	= in_use,
	    pending	= pending,
	    last_warn	= SOME now
	    }
    )

    fun format_state (State {in_use, pending, last_warn}) =
    (
	TF.L ["in_use=", Int.toString in_use,
	      " pending len=", Int.toString(length pending)
	     ]
    )

(*------------------------------------------------------------------------------*)

    fun server chan () =
    let
	val max_open = Int.div(SysWord.toInt(Posix.ProcEnv.sysconf "OPEN_MAX"), 2)

	val () = Log.inform Log.Debug (fn()=>TF.L[
		    "The open file limit will be ", Int.toString max_open]);

	(*  Since we only handle one open transaction at a time we just have
	    the one response channel.
	*)
	val rchan = CML.channel()

	fun loop state =
	let
	    val _ = Log.testInform G.TestOpenMgr Log.Debug
	    	(fn()=> TF.C[TF.S "OpenMgr recvs, ", format_state state])

	    val new_state =
		case CML.recv chan of
		  CtrRequest (n, start_chan) => try_request state n start_chan

		| CtrRelease n => run_defer (incr_in_use state (~n))

		| CtrStats rvar => 
		    let
			val State {in_use, pending, ...} = state
		    in
			Sy.iPut(rvar, (in_use, length pending));
			state
		    end
	in
	    loop new_state
	end


	(*  The state must be immutable so that we can nicely recurse through
	    run_defer. To be safe from deadlock the state transitions must
	    proceed without any chance of an exception breaking them.  The
	    client must be correct.
	*)
	and try_request (state as State {in_use, ...}) n start_chan =
	let
	    val alloc = Allocation n
	in
	    if n <= max_open - in_use
	    then
	    (
		CML.send(start_chan, (alloc, rchan));

		case CML.recv rchan of
		  Success => incr_in_use state n

		| Fail _  => state		(* give up *)

		| Retry alloc => defer state alloc start_chan
	    )
	    else
	    (
		warn_max(defer state alloc start_chan)
	    )
	end




	(*  This will run the finalisation which will queue up
	    release messages to the counter. To be safe against
	    deadlock the GC is triggered from a separate thread.
	*)

	and warn_max (state as State {last_warn, ...}) : State =
	let
	    val now = Time.now()
	in
	    if last_warn = NONE orelse
	       Time.toMilliseconds(Time.-(now, valOf(last_warn))) >= 1000
	    then
	    (
		ignore(CML.spawn(fn() => (
		    Log.log Log.Warn (TF.S "OpenMgr: Too many open files");
		    SMLofNJ.Internals.GC.doGC 10
		    )))
	    )
	    else
		();

	    set_last_warn state now
	end



	(*  Queue a request to retry later after more file descriptors 
	    are released.  No matter what the size of the allocation, a release
	    of a single descriptor may be enough to make a request succeed.
	*)
	and defer (state as State {pending, ...}) alloc start_chan =
	let
	    val p = Pending {
			schan = start_chan,
			alloc = alloc
			}
	in
	    set_pending state (p::pending)
	end



	(*  Pending requests are processed in a round-robin fashion.
	*)
	and run_defer (state as State {pending, ...}) =
	(
	    case rev pending of
	      [] => state

	    | (p::rest) =>
	    let
		val Pending {schan, alloc = Allocation n} = p
		val new_state = set_pending state (rev rest)
	    in
		try_request new_state n schan
	    end
	)


    in
	loop (State {in_use = 0, pending = [], last_warn = NONE})
    end



    structure Counter = Singleton(
    			    type input    = CtrMsg CML.chan
			    val  newInput = CML.channel
			    val  object   = server
			    )


    fun request (n, schan) = CML.send(Counter.get(), CtrRequest (n, schan))

    fun release (Allocation n) = CML.send(Counter.get(), CtrRelease n)

    fun stats() =
    let
	val rvar = Sy.iVar()
    in
	CML.send(Counter.get(), CtrStats rvar);
	Sy.iGet rvar
    end

(*------------------------------------------------------------------------------*)

end


(*==============================================================================*)

(*  This is a generic open manager that is specialised on the different kinds
    of things that can be opened.
*)


(*  This specialises the generic open manager for each openable object.
*)
signature OPEN_MGR_IMPL =
sig
    val     name:   string

    type    Arg
    type    Opened
    type    Closed

    (*	This is the number of file descriptors that are needed
	by the open.
    *)
    val	num_fds:    int


    datatype Result = 
	    Success of Opened
	|   Fail		(* give up totally *)
	|   Retry		(* should try again later *)

    val	openIt:	    Arg -> Result
    val closeIt:    Opened -> Closed

end


signature OPEN_MGR =
sig
    structure Impl: OPEN_MGR_IMPL

    (*	This describes what can be opened or closed. *)
    type    Arg = Impl.Arg

    (*	This represents an open object. *)
    type    Opened = Impl.Opened

    (*	This is the type returned from a close operation. *)
    type    Closed = Impl.Closed

    (*	This is a holder for the object.  The object will be
	finalised if the caller loses its reference to the
	object.
    *)
    type    Holder

    val get:	Holder -> Opened

    (*	Open/close the object.
	This will return NONE if the open failed or was aborted.
    *)
    val openIt:	    Abort.Abort -> Arg -> Holder option
    val openIt':    Arg -> Holder option
    val closeIt:    Holder -> Closed
end



(*------------------------------------------------------------------------------*)


functor OpenMgrFn(
    structure Impl: OPEN_MGR_IMPL
    ) : OPEN_MGR =
struct
    open Common
    structure TF = TextFrag

    structure Ctr  = OpenCounter
    structure Impl = Impl

    structure Fin = FinaliseFn(
			structure Type =
			struct
			    type T = Impl.Opened * Ctr.Allocation
			    fun finalise (opn, _) = ignore(Impl.closeIt opn)
			    val name = Impl.name
			end)


    type Arg    = Impl.Arg
    type Opened = Impl.Opened
    type Closed = Impl.Closed
    type Holder = Fin.Holder


    fun openIt abort arg =
    let
	val schan = CML.channel()

	(*  We may have to try several times.

	    To be safe from deadlock there must be no possibility
	    of an exception preventing the state transitions from
	    completing. Otherwise the counter will block forever.

	    So when we abort we must leave a thread behind to finish
	    the handshaking. Trying to remove the pending request from
	    the counter risks race conditions.
	*)
	fun try() =
	let
	    fun got_alloc (alloc, rchan) =
	    (
		case Impl.openIt arg of
		  Impl.Success opn => (CML.send(rchan, Ctr.Success);
				       SOME (opn, alloc))

		| Impl.Fail =>  (CML.send(rchan, Ctr.Fail alloc); NONE)

		| Impl.Retry => (CML.send(rchan, Ctr.Retry alloc); try())
	    )
	    handle _ => (CML.send(rchan, Ctr.Fail alloc); NONE)


	    fun got_abort() =
	    let
		fun dummy() =
		let
		    val (alloc, rchan) = CML.recv schan
		in
		    CML.send(rchan, Ctr.Fail alloc)
		end
	    in
		CML.spawn dummy;
		NONE
	    end
	in
	    CML.select[
		CML.wrap(CML.recvEvt schan, got_alloc),
		CML.wrap(Abort.evt abort, got_abort)
		]
	end
    in
	(*  Start trying *)
	Ctr.request (Impl.num_fds, schan);

	(*  Once opened, set up a finaliser on the Opened value. *)
	case try() of
	  NONE     => NONE
	| SOME farg => SOME(Fin.add farg)
    end


    fun openIt' arg = openIt (Abort.never()) arg


    fun closeIt holder =
    let
	val (opn, alloc) = Fin.get holder
    in
	Fin.remove holder;
	(Impl.closeIt opn) before (Ctr.release alloc)
    end



    fun get holder = #1(Fin.get holder)

end

(*==============================================================================*)

(*  Some common openers. *)

(*  Read binary files. *)

local
    structure E  = Posix.Error

    structure Impl =
    struct
	val	name = "BinIOReader"
	type    Arg = string
	type    Opened = BinIO.instream
	type    Closed = unit

	val	num_fds = 1

	datatype Result = 
		Success of Opened
	    |   Fail
	    |   Retry


	fun openIt file =
	(
	    Success(BinIO.openIn file)
	)
	handle
	  x as IO.Io {cause = OS.SysErr (_, SOME err), ...} =>
	(
	    if err = E.mfile orelse err = E.nfile
	    then
		Retry
	    else
	    (
		Log.logExn x;	(* a real error *)
		Fail
	    )
	)
	| x => (Log.logExn x; Fail)
	

	fun closeIt strm =
	(
	    BinIO.closeIn strm
	)
	handle x => Log.logExn x

    end
in
    structure BinIOReader = OpenMgrFn(structure Impl = Impl)
end


(*==============================================================================*)

(*  Write binary files. *)

local
    structure E  = Posix.Error

    structure Impl =
    struct
	val	name = "BinIOWriter"
	type    Arg = string
	type    Opened = BinIO.outstream
	type    Closed = unit

	val	num_fds = 1

	datatype Result = 
		Success of Opened
	    |   Fail
	    |   Retry


	fun openIt file =
	(
	    Success(BinIO.openOut file)
	)
	handle
	  x as IO.Io {cause = OS.SysErr (_, SOME err), ...} =>
	(
	    if err = E.mfile orelse err = E.nfile
	    then
		Retry
	    else
	    (
		Log.logExn x;	(* a real error *)
		Fail
	    )
	)
	| x => (Log.logExn x; Fail)
	

	fun closeIt strm =
	(
	    BinIO.closeOut strm
	)
	handle x => Log.logExn x

    end
in
    structure BinIOWriter = OpenMgrFn(structure Impl = Impl)
end


(*==============================================================================*)

(*  Read text files. *)

local
    structure E  = Posix.Error
    structure TF = TextFrag

    structure Impl =
    struct
	val	name = "TextIOReader"
	type    Arg = string
	type    Opened = TextIO.instream
	type    Closed = unit

	val	num_fds = 1

	datatype Result = 
		Success of Opened
	    |   Fail
	    |   Retry


	fun openIt file =
	(
	    Success(TextIO.openIn file)
	)
	handle
	  x as IO.Io {cause = OS.SysErr (_, SOME err), ...} =>
	(
	    if err = E.mfile orelse err = E.nfile
	    then
		Retry
	    else
	    (
		Log.logExn x;	(* a real error *)
		Fail
	    )
	)
	| x => (Log.logExn x; Fail)
	

	fun closeIt strm =
	(
	    TextIO.closeIn strm
	)
	handle x => Log.logExn x

    end
in
    structure TextIOReader = OpenMgrFn(structure Impl = Impl)
end

(*==============================================================================*)

(*  Read directory files. *)

local
    structure E  = Posix.Error

    structure Impl =
    struct
	val	name = "DirReader"
	type    Arg = string
	type    Opened = OS.FileSys.dirstream
	type    Closed = unit

	val	num_fds = 1

	datatype Result = 
		Success of Opened
	    |   Fail
	    |   Retry


	fun openIt file =
	(
	    Success(OS.FileSys.openDir file)
	)
	handle
	  x as IO.Io {cause = OS.SysErr (_, SOME err), ...} =>
	(
	    if err = E.mfile orelse err = E.nfile
	    then
		Retry
	    else
	    (
		Log.logExn x;	(* a real error *)
		Fail
	    )
	)
	| x => (Log.logExn x; Fail)
	

	fun closeIt strm =
	(
	    OS.FileSys.closeDir strm
	)
	handle x => Log.logExn x

    end
in
    structure DirReader = OpenMgrFn(structure Impl = Impl)
end

(*==============================================================================*)

(*  Read from child processes. *)

local
    structure E  = Posix.Error
    structure PP = Posix.Process

    structure Impl =
    struct
	val	name = "ExecReader"
	type    Arg = string * string list * string list
	type    Opened = Unix.proc * string (* includes the script path *)
	type    Closed = int		(* the exit status *)

	(*  Four are used temporarily while opening and then 2 are closed
	    but we don't bother releasing 2.
	*)
	val	num_fds = 4

	datatype Result = 
		Success of Opened
	    |   Fail
	    |   Retry

	val bad_status = 128



	fun openIt (args as (path, _, _)) =
	(
	    Success(Unix.executeInEnv args, path)
	)
	handle
	  x as IO.Io {cause = OS.SysErr (_, SOME err), ...} =>
	(
	    if err = E.mfile orelse err = E.nfile
	    then
		Retry
	    else
	    (
		Log.logExn x;	(* a real error *)
		Fail
	    )
	)
	| x => (Log.logExn x; Fail)
	

	(*  This will wait for up to a second for the child to terminate.
	    If it doesn't it kills it and returns.  The reap will happen
	    asynchronously.
	    Errors are logged.
	*)
	fun closeIt (proc, path) =
	let
	    fun timeout() =
	    let
		val () = Unix.kill(proc, Posix.Signal.kill)
	    in
		Log.error ["Killing stuck child process ", path];
		ignore(CML.spawn(fn () => ignore(Unix.reap proc)));
		bad_status
	    end
	in
	    CML.select[
		CML.wrap(Unix.reapEvt proc, reaped proc path),
		CML.wrap(CML.timeOutEvt(Time.fromSeconds 1), timeout)
		]
	end
	handle x => (Log.logExn x; bad_status)


	and reaped proc path exit_status =
	(
	    case exit_status of
		PP.W_EXITED => 0

	    |   PP.W_EXITSTATUS w => 
		    let
			val ex = Word8.toInt w
		    in
			Log.error ["Child process ", path, " exited with ",
					Int.toString ex];
			ex
		    end

	    | PP.W_SIGNALED signal =>
		(
		    Log.error ["Child process ", path, " exited with signal ",
			SysWord.toString(Posix.Signal.toWord signal)];
		    bad_status
		)

	    | PP.W_STOPPED signal =>
		(
		    Log.error ["Child process ", path, " stopped with signal ",
			SysWord.toString(Posix.Signal.toWord signal)];
		    Unix.kill(proc, Posix.Signal.kill);
		    bad_status
		)
	)

    end
in
    structure ExecReader = OpenMgrFn(structure Impl = Impl)
end

(*==============================================================================*)
