/*
 * video_out_sdl.c
 *
 * Copyright (C) 2000-2001 Ryan C. Gordon <icculus@lokigames.com> and
 *                         Dominik Schnitzer <aeneas@linuxvideo.org>
 *                         Matt Ownby (added VLDP code)
 *
 * SDL info, source, and binaries can be found at http://www.libsdl.org/
 *
 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 *
 * mpeg2dec is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mpeg2dec is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#include <inttypes.h>
#include "inttypesreplace.h"
#include <SDL.h>

#include "video_out.h"
#include "video_out_internal.h"
#include "video_out_sdl.h"

#include "vldp_common.h"
#include "vldp_internal.h"	// MATT

// MATT created this because Windows only wants us to have ONE Overlay, not 3
// So instead of allocating 3 overlays, we'll allocate just normal memory instead
struct overlay_replacement
{
	Uint8 *Y;	// Y channel
	Uint8 *U;	// U channel
	Uint8 *V;	// V channel
};

typedef struct sdl_frame_s {
    vo_frame_t vo;
//    SDL_Overlay * overlay;	// See above for reason to get rid of this
	struct overlay_replacement overlay_alt;
} sdl_frame_t;

typedef struct sdl_instance_s {
    vo_instance_t vo;
    int prediction_index;
    vo_frame_t * frame_ptr[3];
    sdl_frame_t frame[3];

    SDL_Surface * surface;
    Uint32 sdlflags;
    Uint8 bpp;
} sdl_instance_t;

sdl_instance_t g_sdl_instance;	// MATT, to avoid malloc command

static vo_frame_t * sdl_get_frame (vo_instance_t * _instance, int flags)
{
    sdl_instance_t * instance;
    sdl_frame_t * frame;

    instance = (sdl_instance_t *) _instance;
    frame = (sdl_frame_t *) libvo_common_get_frame ((vo_instance_t *) instance,
						    flags);
    return (vo_frame_t *) frame;
}

// copies the contents of src into dst
// assumes destination overlay is locked and *IMPORTANT* assumes src and dst are the same resolution
// thx to Benoit Miller for this fix
void buf2overlay(SDL_Overlay *dst, struct overlay_replacement *src)
{
	Uint8 *src_ptr = src->Y;   // start with Y
	Uint8 *dst_ptr = dst->pixels[0];
	Uint32 i;
	Uint32 w_half = dst->w >> 1;	// half of the overlay width, to avoid calculating this more than once
	Uint32 h_half = dst->h >> 1;	// half of the overlay height, to avoid calculating this more than once

	// copy Y layer
	for (i = 0; i < dst->h; i++)
	{
		memcpy(dst_ptr, src_ptr, dst->w);
		src_ptr += dst->w;
		dst_ptr += dst->pitches[0];
	}

	src_ptr = src->V;   // V
	dst_ptr = dst->pixels[1];
	// copy V layer
	for (i = 0; i < h_half; i++)
	{
		memcpy(dst_ptr, src_ptr, w_half);
		src_ptr += w_half;
		dst_ptr += dst->pitches[1];
	}

	src_ptr = src->U;   // U
	dst_ptr = dst->pixels[2];
	// copy U layer
	for (i = 0; i < h_half; i++)
	{
		memcpy(dst_ptr, src_ptr, w_half);
		src_ptr += w_half;
		dst_ptr += dst->pitches[2];
	}

/*
	// old code that does not take pitch into account
	Uint8 *src_ptr = src->Y;	// start with Y
	Uint8 *dst_ptr = dst->pixels[0];
	Uint32 size = dst->w * dst->h;
	memcpy(dst_ptr, src_ptr, size);
	src_ptr = src->V;	// V
	dst_ptr = dst->pixels[1];
	memcpy(dst_ptr, src_ptr, size >> 2);
	src_ptr = src->U;	// U
	dst_ptr = dst->pixels[2];
	memcpy(dst_ptr, src_ptr, size >> 2);
*/
}

static void sdl_draw_frame (vo_frame_t * _frame)
{
    sdl_frame_t * frame;
    sdl_instance_t * instance;
    Sint32 correct_elapsed_ms = 0;	// MATT, we want this signed since we compare against actual_elapsed_ms
    Sint32 actual_elapsed_ms = 0;	// MATT, we want this signed because it could be negative

    frame = (sdl_frame_t *) _frame;
    instance = (sdl_instance_t *) frame->vo.instance;

	// start MATT

	// if we don't need to skip any frames
	if (!(s_frames_to_skip | s_skip_all))
	{		
		int advanced = 0;	// whether we have advanced our frame counter yet

		// loop once, or more than once if we are paused
		do
		{
			// compute how much time ought to have elapsed based on our frame count
			correct_elapsed_ms = (Sint32) (s_frames_before_next_frame * g_ack_info.ms_per_frame);
			actual_elapsed_ms = SDL_GetTicks() - s_timer;

#ifndef VLDP_BENCHMARK
		  // if we are caught up enough that we don't need to skip any frames, then display the frame
		  if (actual_elapsed_ms < (correct_elapsed_ms + (g_ack_info.ms_per_frame * 2)))
#endif
		  {

			  // we have to copy our buffer to the hardware overlay
			  // and to make changes to the overlay we must lock it
			if (SDL_LockYUVOverlay(s_hw_overlay) == 0)
			{
				buf2overlay(s_hw_overlay, &frame->overlay_alt);

				// if we need to draw some video overlay on top of our frame, then do it!
				if (g_yuv_callback)
				{
					(*g_yuv_callback)(s_hw_overlay);	// call YUV callback!
				}
				SDL_UnlockYUVOverlay(s_hw_overlay);

#ifndef VLDP_BENCHMARK
				// stall if we are playing too quickly and if we don't have a command waiting for us
				while ((Sint32) (SDL_GetTicks() - s_timer) < correct_elapsed_ms)
				{
					if (ivldp_got_new_command())
					{
						break;
					}
	    			SDL_Delay(1);	// note, if this is set to 0, we don't get commands as quickly
				}
#endif

				// if a command comes in at the last second,
				// we don't want to render the next frame that we were going to because it could cause overrun
				// so we only display the frame if we haven't received a command
				if (!ivldp_got_new_command())
				{
                                        SDL_DisplayYUVOverlay (s_hw_overlay, &(instance->surface->clip_rect));
				}

			}
			// if we were unable to lock the overlay (shouldn't happen) we can't draw overlay on the frame
			else
			{
				fprintf(stderr, "DOH! Can't lock YUV overlay, frame will not be updated this time!\n");
				SDL_Delay(1);	// sleep for a second to give overlay time to get unlockable
			}

		  } // end if we don't drop any frames

/*		  
		  // else we're too far beyond so we're gonna have to drop some this frame to catch up (doh!)
		  else
		  {
			fprintf(stderr, "NOTE : dropped frame %u!  Expected %u but got %u\n",
				g_ack_frame, correct_elapsed_ms, actual_elapsed_ms);
		  }
*/

			// if we have not advanced our frame counter yet, then do so
			// the reason this is necessary is because if we are paused, we will loop and we only want to
			// advance the frame counter the first time
			if (!advanced)
			{
				g_ack_frame++;
				advanced = 1;
			}

			// now that the frame has either been displayed or skipped, we can update the counters to reflect
			s_frames_before_next_frame++;

		  // if the frame is to be paused, then stall
		  if (s_paused)
		  {
			paused_handler();
		  }
		  // else if we are supposed to be playing
		  else
		  {
			play_handler();
		  }
		  
		} while (s_paused && !s_skip_all && !s_step_forward);
		// loop while we are paused so video overlay gets redrawn

		s_step_forward = 0;	// clear this in case it was set (since we have now stepped forward)

	} // end if we don't have frames to skip
	
	// if we have frames to skip
	else
	{		
		// we could skip frames for another reason
		if (s_frames_to_skip > 0)
		{
			s_frames_to_skip--;	// we've skipped a frame, so decrease the count
		}
	}

#ifndef VLDP_BENCHMARK
	SDL_Delay(0);	// switch to another thread just in case
#endif
	
	// end MATT
}

static int sdl_alloc_frames (sdl_instance_t * instance, int width, int height)
{
    int i;

	// start MATT
	s_hw_overlay = SDL_CreateYUVOverlay (width, height, SDL_YV12_OVERLAY, instance->surface);
	// In windows, there can only be one hardware overlay and some cards (ATI) freak out if any other overlays are declared at all
	// For some reason, this is NOT documented well in SDL at all.

	// if hw overlay wasn't created properly, abort
	if (!s_hw_overlay)
	{
		fprintf(stderr, "could not create hardware overlay\n");
		return 1;
	}

	{
		FILE *F = fopen("hwaccel.txt", "wt");

		// if we were successfully able to create the file ..
		if (F)
		{
			fprintf(F, "HW Accel is set to %u\n", s_hw_overlay->hw_overlay);
			fclose(F);
		}
	}

	// end MATT

	// allocate 3 YUV buffers
    for (i = 0; i < 3; i++)
	{

		instance->frame[i].overlay_alt.Y = malloc(width * height);
		if (!instance->frame[i].overlay_alt.Y)
		{
			return 1;
		}
		instance->frame[i].overlay_alt.U = malloc((width * height) / 4);

		if (!instance->frame[i].overlay_alt.U)
		{
			free(instance->frame[i].overlay_alt.Y);
			return 1;
		}
		instance->frame[i].overlay_alt.V = malloc((width * height) / 4);
		if (!instance->frame[i].overlay_alt.V)
		{
			free(instance->frame[i].overlay_alt.Y);
			free(instance->frame[i].overlay_alt.U);
			return 1;
		}

		instance->frame_ptr[i] = (vo_frame_t *) (instance->frame + i);
			//		instance->frame[i].vo.base[0] = instance->frame[i].overlay->pixels[0];
			//		instance->frame[i].vo.base[1] = instance->frame[i].overlay->pixels[2];
			//		instance->frame[i].vo.base[2] = instance->frame[i].overlay->pixels[1];
		instance->frame[i].vo.base[0] = instance->frame[i].overlay_alt.Y;	// Matt's replacement for using an SDL Overlay
		instance->frame[i].vo.base[1] = instance->frame[i].overlay_alt.U;
		instance->frame[i].vo.base[2] = instance->frame[i].overlay_alt.V;

		instance->frame[i].vo.copy = NULL;
		instance->frame[i].vo.field = NULL;
		instance->frame[i].vo.draw = sdl_draw_frame;
		instance->frame[i].vo.instance = (vo_instance_t *) instance;
    } // end for

    return 0;
}

static void sdl_close (vo_instance_t * _instance)
{
    sdl_instance_t * instance;
    int i;

    instance = (sdl_instance_t *) _instance;

	// if we have previously allocated a hardware overlay, then free it now
    if (s_hw_overlay)
    {
    	SDL_FreeYUVOverlay(s_hw_overlay);
    	s_hw_overlay = NULL;
    }
    
	// deallocate memory
    for (i = 0; i < 3; i++)
	{
		if (instance->frame[i].overlay_alt.Y)
		{
			free(instance->frame[i].overlay_alt.Y);
			instance->frame[i].overlay_alt.Y = NULL;
		}
		if (instance->frame[i].overlay_alt.U)
		{
			free(instance->frame[i].overlay_alt.U);
			instance->frame[i].overlay_alt.U = NULL;
		}
		if (instance->frame[i].overlay_alt.V)
		{
			free(instance->frame[i].overlay_alt.V);
			instance->frame[i].overlay_alt.V = NULL;
		}
	}

	s_overlay_allocated = 0;
}

static int sdl_setup (vo_instance_t * _instance, int width, int height)
{
	sdl_instance_t * instance;

    instance = (sdl_instance_t *) _instance;

	// MATT, removed the code to create the surface here since we
	// want it created before VLDP is called

	// we only want to allocate the frames the first time ...
	if (!s_overlay_allocated)
	{
	    if (sdl_alloc_frames (instance, width, height))
		{
			fprintf (stderr, "sdl could not allocate frame buffers\n");
			return 1;
	    }
	    s_overlay_allocated = 1;
	}
	// else do nothing

    return 0;
}


vo_instance_t * vo_sdl_open (SDL_Surface *surface)
{
    sdl_instance_t * instance = &g_sdl_instance;

    instance->vo.setup = sdl_setup;
    instance->vo.close = sdl_close;
    instance->vo.get_frame = sdl_get_frame;
	instance->sdlflags = surface->flags;
	instance->bpp = surface->format->BitsPerPixel;
	instance->surface = surface;

    return (vo_instance_t *) instance;
}
