#!/usr/bin/perl -w
#
# WWW subroutines used at the Human Ageing Genomics Resources (http://genomics.senescence.info)

# I-CpGI or Identifier of CpG Islands

sub i_cpgi {
	my($cg, $length, $ratio, $dna) = @_;
	my $dna_length = length($dna);
	die "Invalid sequence. Please verify your query sequence.\n" unless $dna_length > $length;
	undef my %islands;
	my $cpg_end = 0;
	my $cpg_start = 0;

	# following the method by Takai and Jones, what follows are steps A to D
	while ($cpg_start != -1) {
		my $cursor = 0;
		$cursor = $cpg_end + 1 unless ($cpg_end == 0);
		$cpg_start = -1;

		my $temp = 0;
		# we start checking in the 3' direction
		while (($dna_length > $cursor + $length) && ($temp != -1)) {
			my $seq = substr($dna, $cursor, $length);
			my $c_content = ($seq =~ s/c/c/gi)/$length;
			my $g_content = ($seq =~ s/g/g/gi)/$length;
			if ((($c_content + $g_content) > $cg) && (find_cpg($c_content, $g_content, $ratio, $seq) == 1)) {
				$cpg_start = $cursor unless ($cpg_start != -1);
				$cpg_end = $cursor + $length;
				$cursor += $length;
			} else {
				++$cursor;
				$temp = -1 unless ($cpg_start == -1);
			}
		}

		# and then step E, seeking in the 5' direction to find the end of the island
		$cursor = $cursor + $length;
		$cursor = $dna_length unless ($cursor < $dna_length);

		while ($cpg_end <= $cursor && $cpg_start != -1) {
			my $seq = substr($dna, $cursor-$length, $length);
			my $c_content = ($seq =~ s/c/c/gi)/$length;
			my $g_content = ($seq =~ s/g/g/gi)/$length;
			if ((($c_content + $g_content) > $cg) && find_cpg($c_content, $g_content, $ratio, $seq)) {
				$cpg_end = $cursor;
				$cursor -= $length;
			}
			$cursor--;
		}

		# now for steps G and H
		my $flag = 0;
		while ($flag == 0 && $cpg_start != -1) {
			my $seq = substr($dna, $cpg_start, $cpg_end - $cpg_start);
			my $c_content = ($seq =~ s/c/c/gi)/length($seq);
			my $g_content = ($seq =~ s/g/g/gi)/length($seq);
			if ((($c_content + $g_content) > $cg) && find_cpg($c_content, $g_content, $ratio, $seq)) {
				$flag = 1;
			} else {
				$cpg_start++;
				$cpg_end--;
			}
		}

		$islands{"$cpg_start"} = "$cpg_end" unless $cpg_start == -1;
	}

	# now for step I
	my $past_end = 0;
	my $past_start = 0;
	foreach (sort by_number keys %islands) {
		if ($past_end + $length/2 > $_ && $past_end != 0) {
			$islands{$past_start} = $islands{$_};
			$past_end = $islands{$_};
			delete $islands{$_};
		} else {
			$past_end = $islands{$_};
			$past_start = $_;
		}
	}

	# graphical display
	print "I-CpGI\n\n";
	printf "%g%72g\n", 0, $dna_length;
	print "|----------------------------------------------------------------------|\n";
	foreach my $cp_start (sort by_number keys %islands) {
		my $start = int($cp_start/$dna_length*70);
		my $end = int($islands{$cp_start}/$dna_length*70);
		for ($start..$end) {
			printf "%s", "^";
		}
		print "\n";
		printf "%".$start."s%".$end."s\n", $cp_start, $islands{$cp_start};
	}
	print "\n".scalar(keys %islands)." CpG island(s) found\n";
}

sub find_cpg {
	my($c_ratio, $g_ratio, $ratio, $query_seq) = @_;
	my $observed = ($query_seq =~ s/cg/cg/gi);
	my $expected = $c_ratio*$g_ratio*length($query_seq);
	return 0 unless ($c_ratio > 0 && $g_ratio > 0);
	if ($observed/$expected >= $ratio) {
		return 1;
	} else {
		return 0;
	}
}

# Interaction Graphic Display
#
# subroutine to draw protein-protein interactions

sub draw_interactions {
	my($core, $inter, $names, $type) = @_;
	my $fix = 0;
	$fix = 1 if ($core != 0);
	undef my %coordx;
	undef my %coordy;
	my $radius = 50+(scalar(keys %$names)-$fix)*10;
	my $im = new GD::Image(3*$radius,3*$radius);
	my $brush = new GD::Image(2,2);
	my $ct_x = 3*$radius/2;
	my $ct_y = 3*$radius/2;
	my $white = $brush->colorAllocate(255,255,255);
	my $blue = $brush->colorAllocate(0,0,255);
	my $red = $brush->colorAllocate(255,0,0);
	my $green = $brush->colorAllocate(0,255,0);
	my $black = $brush->colorAllocate(0,0,0);
	$brush->transparent($white);
	$brush->fill(1,1,$blue);
	$im->setBrush($brush);
	$im->transparent($white);

	if ($fix == 1) {
		my $colour = $black;
		$colour = $red if ($$type{"$core"} eq "human");
		$colour = $green if ($$type{"$core"} eq "model");
		$colour = $blue if ($$type{"$core"} eq "mammal");
		if ($$type{"$core"} eq "downstream") {
			$im->string(gdSmallFont,$ct_x-length($$names{$core})*2.75,$ct_y-8,$$names{$core},$colour);
		} else {
			$im->string(gdMediumBoldFont,$ct_x-length($$names{$core})*3.5,$ct_y-8,$$names{$core},$colour);
		}
		$cordx{$core} = $ct_x;
		$cordy{$core} = $ct_y;
		$im->arc($ct_x,$ct_y,50,50,0,360,$black);
	}

	my $divider = 6.2832/(scalar(keys %$names)-$fix);
	my $counter = 0;
	foreach (keys %$names) {
		next if ($fix == 1 && $_ == $core);
		++$counter;
		my $x = $ct_x+int(cos($divider*$counter)*$radius)+length($$names{$_})*3.5;
		my $y = $ct_y+int(sin($divider*$counter)*$radius)+8;
		my $colour = $black;
		$colour = $red if ($$type{"$_"} eq "human");
		$colour = $green if ($$type{"$_"} eq "model");
		$colour = $blue if ($$type{"$_"} eq "mammal");
		if ($$type{"$_"} eq "downstream") {
			$im->string(gdSmallFont,$x-length($$names{$_})*2.75,$y-8,$$names{$_},$colour);
		} else {
			$im->string(gdMediumBoldFont,$x-length($$names{$_})*3.5,$y-8,$$names{$_},$colour);
		}
		$cordx{$_} = $x;
		$cordy{$_} = $y;
		$im->arc($x,$y,50,50,0,360,$black);
	}
	foreach (keys %$inter) {
		my @temp = split/\+/, $$inter{$_};
		my $xa = $cordx{$temp[0]};
		my $ya = $cordy{$temp[0]};
		my $xb = $cordx{$temp[1]};
		my $yb = $cordy{$temp[1]};
		my $distance = sqrt(($xa-$xb)*($xa-$xb)+($ya-$yb)*($ya-$yb));
		if ($fix == 1) {
			$im->line($xa+int(($xb-$xa)/$distance*25),$ya+int(($yb-$ya)/$distance*25),$xb+int(($xa-$xb)/$distance*25),$yb+int(($ya-$yb)/$distance*25),gdBrushed);
		} else {
			$im->line($xa+int(($xb-$xa)/$distance*25),$ya+int(($yb-$ya)/$distance*25),$xb+int(($xa-$xb)/$distance*25),$yb+int(($ya-$yb)/$distance*25),$black);
		}
	}
	print "Content-type:image/png\n\n";
	binmode(STDOUT);
	print $im->png;
}

# Phylogenetic Tree Plotter

sub draw_phylum {
	my ($divergence, $draw_common, $draw_names, $tree_type, @phylogeny) = @_;
	my $count = 0;
	my (%done, %past, %present);
	my $previous_div = 0;
	foreach ((keys %{$phylogeny[5]})) {
		$done{$_} = $phylogeny[5]->{$_}." ".$phylogeny[6]->{$_};
	}
	my $im = new GD::Image(800+$draw_common*100,100+60*$divergence);
	my $white = $im->colorAllocate(255,255,255);
	my $black = $im->colorAllocate(0,0,0);
	$im->transparent($white);
	for $present (@phylogeny) {
		++$count;
		my %hagrid = reverse %$present;
		my @names = sort sort_alpha keys %hagrid;

		if (scalar(@names) > $previous_div && $previous_div > 0) {
			for (1..scalar(@names)) {
				my $num_nam = $_-1;
				undef my @old;
				undef my @new;
				foreach my $key ( keys %$present) {
					if ($$present{$key} eq $names[$num_nam]) {
						push @new, $key;
					}
				}
				my $backref = $past{"$new[0]"};
				foreach my $key ( keys %$present) {
					if ($past{$key} eq $backref) {
						push @old, $key;
   					}
				}
				if ((@old) != (@new) && defined($new[0])) {
					undef my %branch;
					foreach (@old) {
						$branch{"$$present{$_}"} += 1;
					}
					my $counter = 0;
					my $div_conv = 0;
					my $fix_count = 1;
					foreach (keys %branch) {
						++$counter;
	       					my $branching = $branch{$_}-1;
 						$div_conv = $div_conv+$counter*$fix_count;
						$fix_count = -$fix_count;
						foreach my $key (@old) {
							if ($$present{$key} eq $_) {
								$past{$key} += $div_conv-$branching*$fix_count;
								$im->line((($count-1)*100),(40*$divergence+($past{$key}-$div_conv+$branching*$fix_count)*20),(($count-1)*100),(40*$divergence+$past{$key}*20),$black);
								if ($branching == 0 && $tree_type == 1) {
									$im->line((($count-1)*100),(40*$divergence+$past{$key}*20),(($count)*100),(40*$divergence+$past{$key}*20),$black);
					       			       	$im->string(gdSmallFont,($count*100+10),(40*$divergence+$past{$key}*20-5),$done{$key},$black);
									$done{$key} = -1;
								}
								delete $$present{$key};
   							}
						}
				   	}

					my $hagrid_key = $hagrid{$names[$num_nam]};
					if ($done{$hagrid_key} != -1 && $names[$num_nam] =~ /^[A-Z]/) {
						$im->line((($count-1)*100),(40*$divergence+$past{$hagrid_key}*20),(($count)*100),(40*$divergence+$past{$hagrid_key}*20),$black);
						$im->string(gdSmallFont,(($count-1)*100+3),(40*$divergence+$past{$hagrid_key}*20),$names[$num_nam],$black) unless ($draw_names == 0);
					}
				} else {
					my $hagrid_key = $hagrid{$names[$num_nam]};
					if ($done{$hagrid_key} != -1 && $names[$num_nam] =~ /^[A-Z]/) {
						$im->line((($count-1)*100),(40*$divergence+$past{$hagrid_key}*20),(($count)*100),(40*$divergence+$past{$hagrid_key}*20),$black);
						$im->string(gdSmallFont,(($count-1)*100+3),(40*$divergence+$past{$hagrid_key}*20),$names[$num_nam],$black) unless ($draw_names == 0);
					}
				}
			}
		} else {
			if ($draw_common == 1) {
				for (1..scalar(@names)) {
					my $hagrid_key = $hagrid{$names[$_-1]};
					$im->line((($count-1)*100),(40*$divergence+$past{$hagrid_key}*20),(($count)*100),(40*$divergence+$past{$hagrid_key}*20),$black) unless ($done{$hagrid_key} == -1);
					$im->string(gdSmallFont,(($count-1)*100+3),(40*$divergence+$past{$hagrid_key}*20),$names[$_-1],$black) unless ($draw_names == 0 || $done{$hagrid_key} == -1);
				}
			} else {
				--$count;
			}
		}
		$previous_div = scalar(@names);
	}
	if ($tree_type != 1) {
		foreach (keys %past) {
			$im->line((($count-1)*100),(40*$divergence+$past{$_}*20),(($count)*100),(40*$divergence+$past{$_}*20),$black);
			$im->string(gdSmallFont,($count*100+10),(40*$divergence+$past{$_}*20-5),$done{$_},$black);
		}
	}
	print "Content-type:image/png\n\n";
	binmode(STDOUT);
	print $im->png;
}

sub sort_alpha { "\L$a" cmp "\L$b" }

sub by_number { $a <=> $b }

# my death subroutine, in case something goes wrong
sub death {
	print "Content-type:text/plain\n\n";
	print "Error in script!\nPlease make sure you have entered your query correctly.\nIf the error persists, please contact the system administrator at feedback\@genomics.senescence.info\n";
	exit;
}

1;
