---
title: "Cell tracking analysis"
author: "Erin Boyle Anderson"
date: '`r format(Sys.Date(), "%B %d, %Y")`'
output: 
  html_document:
    toc: true
    number_sections: true
  
---

```{r setup,echo=FALSE,include=FALSE}
knitr::opts_chunk$set(echo = FALSE, message=FALSE, warning = FALSE)

library(tidyverse)
library(reshape2)
library(gridExtra) #this helps make my graphs organized pretty
library(CellTrackingEBA)
library(ggpubr)
library(circular) # calculating circular stats
# if I want to pull it from Github instead
#library(devtools)
#install_github("erinboyleanderson/CellTrackingEBA")


# for linear regression to work in geom with facet grid
library(devtools)
source_gist("524eade46135f6348140")

# set themes for graphs to use throughout-----

colorscale<-c("black","red","blue","purple") #for graphs for sorting wt, tbx5a and tbx5b via ggplot2
theme_set(theme_bw())


# for presentations -- uncomment when I am making those plots for presentations.
# theme_black<-dget("theme_black.R")
# theme_set(theme_black())
#colorscale<-c("white","red","blue","purple") # for themedark

# theme for tracks
theme_track<-list(theme(panel.grid=element_blank()),# get rid of grid lines
                  xlab("AP"), #label axes appropriately
                  ylab("ML"),
                  scale_color_gradientn(colors=rainbow(5))) #colorscale

# for statistics over time of 40 intervals
theme_stat<-list(scale_x_continuous(breaks=c(0,20,40),
                     limits=c(0,40),
                     labels=c("18 hpf","21 hpf","23 hpf")),
                 scale_color_manual(values=colorscale))

#for statistics over 60 intervals
theme_stat.60<-list(scale_x_continuous(breaks=c(0,20,40,60),
                     limits=c(0,60),
                     labels=c("18 hpf","21 hpf","23 hpf","25 hpf")),
                 scale_color_manual(values=colorscale))
```
# General questions and comments


# Data Import and tidying up

```{r wt and 5a data}
#need to also read these into R

# is this in pixels or in um? I need to double check this

wt<-data.frame()
for(n in 1:8){ #reads in what I am assuming is Qiyan's finalized matrix with corrected angle and drift subtracted out
  infile<-paste(paste("01 WT/06b Matrices_cmpt with overlap/Matrix_cmptwo_0",n,sep=""),
                "txt",sep=".")
  a<-read.table(infile)
  a$Embryo<-n
  wt<-rbind(wt,a)
  rm(a,infile)
}

wt<-select(wt,
           Embryo, Track, Time, X, Y, Somite)%>% #select only the columns that I am interested in
  mutate(Y=(Y*-1)+300)%>% # need to flip Y axis because of the inverse imagej coordinate plane then retransolate it back to the top
  mutate(X=X/1.25,Y=Y/1.25) # convert pixels into um 

wt<-MLquartile(wt) #assign ML quartiles

Tbx5a<-data_frame()
for(n in 1:8){ #reads in what I am assuming is Qiyan's finalized matrix with corrected angle and drift subtracted out
  infile<-paste(paste("02 5aMO/06b Matrices_cmpt with overlap/Matrix_cmptwo_0",n,sep=""),
                "txt",sep=".")
  a<-read.table(infile)
  a$Embryo<-n
  Tbx5a<-rbind(Tbx5a,a)
  rm(a,infile)
}
Tbx5a<-select(Tbx5a,
           Embryo, Track, Time, X, Y, Somite)%>%
  mutate(Y=(Y*-1)+300)%>%
  mutate(X=X/1.25,Y=Y/1.25) # convert pixels into um

Tbx5a<-MLquartile(Tbx5a)
```

```{r import and align 5b data, include=FALSE}

#datasets-----
Tbx5b<-data.frame() #RAW cell tracking data
for(n in 1:8){ #when I get more datafiles added, expand n
  infile<-paste(paste("Tracking_Tbx5b",n,sep="_"),"csv",sep=".")
  a<-read.csv(infile)
  colnames(a)<-c("X.1","Track","Time","X","Y","Distance","Velocity","Pixel.Value") #bc sometimes the colnames change if I have to edit the file so let's just force them all to be the same to fix that error
  a$Embryo<-n
  Tbx5b<-rbind.data.frame(Tbx5b,a)
  rm(a)
} #this reads in all the cell tracking info into one dataframe, with a seperate column per embryo

Tbx5b<-select(Tbx5b,Embryo,Time,Track,X,Y) #select only columns that I care about

# convert manual tracking data in pixels to um
Tbx5b<-mutate(Tbx5b,X=X/1.2044,Y=Y/1.2044)

#drift
Tbx5b.drift<-data.frame()
for(n in 1:8){#read in drift
  infile<-paste(paste("drift_Tbx5b",n,sep="_"),"csv",sep=".")
  a<-read.csv(infile)
  a<-a[,1:5] # select just the first 5 columns bc they are not equal in each
  colnames(a)<-c("X.1","Track","Time","X","Y") 
  a$Embryo<-n
  Tbx5b.drift<-rbind.data.frame(Tbx5b.drift,a)
  rm(a)
}

#convert MT data in pixels to microns
Tbx5b.drift<-mutate(Tbx5b.drift,X=X/1.2044,Y=Y/1.2044)


# these measurements are all in um

side.5b<-read.csv("side.csv") #to make everything all the same side
somites.5b<-read.csv("somiteboundaries.csv") # this is a plot of the somite boundaries at the lateral extent--use for recentering the plot
slope.5b<-slope(somites.5b)

side.angle.test.5b<-read.csv("angle_Tbx5b.csv")


# alignment of data----
Td<-driftcorrect(Tbx5b.drift,Tbx5b,8) #drift
Tcorr<-CellTrackingEBA::rotate(Td,somites.5b,8,side.5b) #rotate, need to call specific package because something is masking rotate
somites.5b.r<-rotate.somites(somites.5b,8,side.5b) #rotate somites
Tbx5b<-label.somites(Tcorr,somites.5b.r,8)
Tbx5b<-MLquartile(Tbx5b)


# if I want to see what the calculated angles are
a<-slope(somites.5b)%>%
  mutate(angle=atan(slope)*180/pi)

#test points at T=1
p<-ggplot(filter(Tbx5b, Time==1, Somite>=1,Somite<=5),aes(X,Y))+
  geom_point()+facet_grid(~Embryo)


#plot somite boundaries to make sure they are in a straight line----
p<-ggplot(data=somites.5b.r, mapping=aes(x=Xr,
                              y=Yr))+
  geom_point(aes(color=boundary))+
  facet_wrap(~Embryo)+ggtitle("Somite boundaries Tbx5b MO")

p+geom_point(filter(Tbx5b,Time==1),mapping=aes(x=X,y=Y,group=Track)) # make sure tracks line up with the somite boundaries

p<-ggplot(somites.5b,mapping=aes(X,Y))+
  geom_point(aes(color=boundary))+
  facet_wrap(~Embryo)+
  geom_point(data=filter(Td,Time==1)) # could I have mixed up embryo 2 and embryo 5? Embryo 5 isn't even tracked on the correct side of the somites~ I'm a little skeptical of embryo 5 tracknig... but it looks correct on fiji---lines up with nuclei

#once I have all the pieces I want 
rm(Td, Tcorr)
```

```{r test plots 5b, include=FALSE}


#test to make sure things are roughly orientated correctly

p<-ggplot(data=wt,mapping=aes(x=X,
                              y=Y,
                              group=Track))+
  geom_line(aes(color=Somite))+
  facet_wrap(~Embryo)+
  theme_track+
  ggtitle("tracks for wt embryos") 
p

a<-filter(wt, Time==1, Embryo==1)
p<-ggplot(data=a,mapping=aes(x=X,
                              y=Y,
                              group=Track))+
  geom_point(aes(color=Somite,shape=as.factor(MLpos)))+
  facet_grid(~Embryo)+
  ggtitle("wt Embryos at time 1")+
  theme_track


p<-ggplot(data=Tbx5a,mapping=aes(x=X,
                              y=Y,
                              group=Track))+geom_line(aes(color=Somite))+facet_wrap(~Embryo)+
  ggtitle("Tracks for Tbx5a Embryos")+
  theme_track
p

b<-filter(Tbx5b,Time==1)

p<-ggplot(data=b, mapping=aes(x=X,
                              y=Y,
                              group=Track))+
  geom_point(aes(color=Somite,shape=as.factor(MLpos)))+
  facet_wrap(~Embryo)+ggtitle("Tracks Tbx5b at time 1")+
  theme_track
p


rm(a, side.5b, side.angle.test.5b, slope.5b, somites.5b, somites.5b.r, Tbx5b.drift)
```

```{r double data, include=FALSE}
#more detailed comments on code under Tbx5b import

#read in tracking DF
double<-data.frame()
for(n in 1:8){ #when I get more datafiles added, expand n
  infile<-paste(paste("Tracking_double",n,sep="_"),"csv",sep=".")
  a<-read.csv(infile)
  colnames(a)<-c("X.1","Track","Time","X","Y","Distance","Velocity","Pixel.Value") 
  a$Embryo<-n
  double<-rbind.data.frame(double,a)
  rm(a)
} 

double<-select(double,Embryo,Time,Track,X,Y)%>% #select only columns that I care about
  mutate(X=X/1.2044,Y=Y/1.2044) # convert pixels to um

#drift
double.drift<-data.frame()
for(n in 1:8){#read in drift
  infile<-paste(paste("drift_double",n,sep="_"),"csv",sep=".")
  a<-read.csv(infile)
  a<-a[,1:5]
  colnames(a)<-c("X.1","Track","Time","X","Y") 
  a$Embryo<-n
  if(n==1){ # for some reason time is doubled for Embryo 1
    a$Time<-a$Time/2
  }
  if(n==2){ # i think the problem was that I tracked on a two channel image w/o seperating the tracks
    a$Time<-(a$Time+1)/2
  }
  double.drift<-rbind.data.frame(double.drift,a)
  rm(a)
}
double.drift<-mutate(double.drift,X=X/1.2044,Y=Y/1.2044) #convert pixels to um


#somite boundaries
somites.double<-read.csv("somites_double.csv")
#make a side index
side.double<-data.frame(Embryo=c(1,2,3,4,5,6,7,8),
                        Side=c("Left","Right","Left","Right","Left","Right","Right","Left"),
                        multiplier=c(1,-1,-1,1,-1,-1,-1,-1)) # multiplier is complicated and I need a better explanation for it

#registration
double.drift<-driftcorrect(double.drift,double,8)
double.rotated<-CellTrackingEBA::rotate(double.drift,somites.double,8,side.double)
double.somites.r<-rotate.somites(somites.double,8,side.double)
double<-label.somites(double.rotated,double.somites.r,8)
double<-MLquartile(double)

#are all plots complete?
p<-ggplot(double, aes(Track, Time))+
  geom_point()+
  facet_wrap(~Embryo)
# Embryo 7, track 5 is incomplete, so remove it also embryo 6, track 41

double<-filter(double, !(Embryo==7&Track==5))%>%
  filter(!(Embryo==6&Track==41))

#at some point need to also label somites

p<-ggplot(filter(double, Time==1, Somite>=1, Somite<=5),aes(X,Y))+
  geom_point()+
  facet_wrap(~Embryo)

p2<-p+geom_point(data=double.somites.r, mapping=aes(Xr,Yr), color="blue")

#####test plots ########

p<-ggplot(double,aes(x=X,y=Y,group=Track))+
  geom_line(aes(color=Somite))+
  theme_track+
  ggtitle("Double tracks")+facet_wrap(~Embryo)

p2<-p+geom_point(data=double.somites.r, mapping=aes(X=Xr,Y=Yr,group=NULL))

# this is shifted way lower than in should be although it is nicely rotated --- not embryo 2

rm(double.drift,double.rotated) # clean up

#just a quick test to see how the tracks are being divided up by somites and ML quartile and if they should be further divided
a<-filter(double,Time==1)
a$MLpos<-as.factor(a$MLpos)
p<-ggplot(a, aes(X,Y))+geom_point(aes(shape=as.factor(MLpos)))+
  facet_wrap(~Embryo) #+
  #scale_color_gradientn(colors = rainbow(5))
#right now it looks like that one point at 150 is a far outlier wrt MLpos in e1
                                                         

p2<-p+geom_point(data=double.somites.r,aes(Xr,Yr,color=boundary))
rm(a)    
rm(double.somites.r, side.double, somites.double)
```

```{r combined datasets and subsets}


assign.geno.all<-function(DF.wt,DF.5a,DF.5b,DF.doub){
  a<-DF.wt
  b<-DF.5a
  c<-DF.5b
  d<-DF.doub
  a$genotype<-"wt"
  b$genotype<-"Tbx5a"
  c$genotype<-"Tbx5b"
  d$genotype<-"double"
  e<-rbind.data.frame(a,b,c,d)
  e$genotype<-factor(e$genotype, c("wt","Tbx5a","Tbx5b","double")) # reorder genotype in the order I would like it to be in
  return(e)
}

ALL<-assign.geno.all(wt, Tbx5a,Tbx5b, double)
ALL1.5<-filter(ALL,Somite>=1)%>%
  filter(Somite<6)

#subset to cells in somites 1-5 only and only 1st 40 time points
Tbx5b.40<-filter(Tbx5b, Time<=40) # to match up with the 1:40 time points used in previous experiments
Tbx5b.40.s1.s5<-filter(Tbx5b.40,Somite>=1)%>%
  filter(Somite<=5) #this matches up with X extent of wt,5a
double.40.s1.s5<-filter(double, Time<=40, Somite>=1)%>%filter(Somite<=5)


#subset to cells in Limb field only
LF.wt<-filter(wt,Somite<5)
LF.5a<-filter(Tbx5a, Somite<5)
LF.5b<-filter(Tbx5b.40.s1.s5,Somite<5)
LF.doub<-filter(double.40.s1.s5,Somite<5)

LF.all<-assign.geno.all(LF.wt,LF.5a,LF.5b,LF.doub)


```

Qiyan's data: complete sets of Tbx5a and wt tracking

Wildtype embryos

* `r max(wt$Embryo)` number of embryos
* tracked for `r max(wt$Time)` intervals of 8 minutes starting at 18 somites


Tbx5a embryos

Wildtype embryos

* `r max(Tbx5a$Embryo)` number of embryos
* tracked for `r max(Tbx5a$Time)` intervals of 8 minutes starting at 18 somites

  
My data

Tbx5b morphants

* `r max(Tbx5b$Embryo)` embryos
* tracked for 60 frames at intervals of 8 minutes starting at 18 somites
  * note: restricted most analyses to only the 1st 40 time points
  

Double morphants

* `r max(double$Embryo)` embryos
* tracked for 40 frames at 8 minute intervals starting at 18 somites


General notes:

* unless otherwise stated, all distance measurements are in um and time measurements is in minutes.
* also should try to use Qiyan's digital flat mounting--I haven't yet done that, although she found that it did not make a significant difference


```{r tracks in range}
# test to make sure enough tracks per embryos

a<-tracks.in.range(wt)%>%select(Embryo,"track #")%>%rename("Wt tracks"="track #")
b<-tracks.in.range(Tbx5a)%>%rename("Tbx5a tracks"="track #")
c<-tracks.in.range(Tbx5b.40.s1.s5)%>%rename("Tbx5b tracks"="track #")
d<-tracks.in.range(double.40.s1.s5)%>%rename("double tracks"="track #")

left_join(left_join(left_join(a,b,by="Embryo"),c,by="Embryo"),d,by="Embryo")

rm(a,b,c,d)

#this will show how evenly distributed everything is
test<-ggplot(ALL1.5,aes(Somite,MLpos))+geom_point()+facet_grid(genotype~Embryo)
```

```{r all-test-plots, include=FALSE}
p<-ggplot(filter(ALL1.5, Time==1),aes(Y))+
  geom_density()+
  facet_grid(genotype~Embryo)


```

##Basic overview plots


```{r plots}
p.alltracks<-ggplot(data=ALL1.5,mapping = aes(x=X,
                                 y=Y,
                                 group=Track,
                                 color=Somite))+
  geom_line(aes(color=Somite))+
  theme_track+
  facet_grid(genotype~Embryo)+ ggtitle("All tracks")

p.alltracks

p<-ggplot(filter(ALL1.5, Time==1),mapping = aes(x=X,
                                 y=Y,
                                 group=Track,
                                 color=Somite))+
  geom_point(aes(shape=as.factor(MLpos)))+
  theme_track+
  facet_grid(genotype~Embryo, scales = "free_y")+ ggtitle("Division of cells by AP and ML positions")
p

# they are aligned really well AP, but not aligned as well along the ML axis if we look only at the positions that we are interested in

p<-ggplot(ALL1.5,aes(X,Y,group=interaction(Embryo,Track)))+
  geom_line(aes(color=Somite))+
  theme_track+
  facet_grid(~genotype)+
  ggtitle("All tracks combined")


```


# Statistics and Analysis


## Corresponding AP values

Does starting position predict final position? This shows the correlations between starting AP and ML position and final AP and ML position, for all comparisons.

```{r AP start vs finish}

a<-filter(ALL1.5,Time==1)%>%
  rename(startX=X,startY=Y)%>%
  select(Embryo,Track, genotype, startX,startY)
b<-filter(ALL1.5,Time==40)
c<-left_join(b,a,by=c("Embryo","Track","genotype")) #this adds the starting value on to everything to index for graphing


p.AP.AP<-ggplot(c,aes(startX,X))+
  geom_point(aes(color=Somite))+
  scale_color_gradientn(colors=rainbow(7))+
  facet_wrap(~genotype,nrow=1)+
  stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE, 
                   size=1)+ #add LR
  geom_smooth(method="lm", color="black")+
  xlab("Starting AP")+ylab("Final AP")+
  ggtitle("Final AP position by starting AP position")

p.ML.AP<-ggplot(c,aes(startY,X))+
  geom_point(aes(color=MLpos))+
  scale_color_gradientn(colors=rainbow(7))+
  facet_wrap(~genotype,nrow=1)+
   stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE,
                    size=1)+ #add LR
  geom_smooth(method="lm", color="black")+
  xlab("Starting ML")+ylab("Final AP")+
  ggtitle("Final AP position by starting ML position")

p.AP.ML<-ggplot(c,aes(startX,Y))+
  geom_point(aes(color=Somite))+
  scale_color_gradientn(colors=rainbow(7))+
  facet_wrap(~genotype,nrow=1)+
  stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE,
                   size=1)+ #add LR
  geom_smooth(method="lm", color="black")+
  xlab("Starting AP")+ylab("Final ML")+
  ggtitle("Final ML position by starting AP position")

p.ML.ML<-ggplot(c,aes(startY,Y))+
  geom_point(aes(color=MLpos))+
  scale_color_gradientn(colors=rainbow(7))+
  facet_wrap(~genotype,nrow=1)+
   stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE,
                    size=1)+ #add LR
  geom_smooth(method="lm", color="black")+
  xlab("Starting ML")+ylab("Final ML")+
  ggtitle("Final ML position by starting ML position")

grid.arrange(p.AP.AP,p.AP.ML,p.ML.ML,p.ML.AP)


```

Starting AP position can predict final AP position and starting ML position can predict final AP position, but starting ML position does not predict final AP position and starting AP position cannot predict final ML position.


## Measures of convergenence

These plots will show movement along the AP and ML axis with regards to time. Each colored line is the average of all the cells in that group (either somite group or ML position) per embryo, while the black line is the total average for that position. Unless otherwise stated, the center of mass is subtracted from the positioning, which allows us to look at how these cells are moving in regard to each other, while ignoring the movement of the limb field as a whole.

```{r spaghetti plots, fig.height=2}

#### correct for geometric center ####
geometric.center<-ALL1.5%>%
  group_by(Embryo,genotype, Time)%>%
  summarise(centerX=mean(X),centerY=mean(Y))


mean.pos.som<-ALL1.5%>% #calculate the mean X position at each somite per embryo at each time point
  unite(E.S,c(Embryo,Somite),sep=".",remove=FALSE)%>% #need this so we have a unique signifier for plotting
  
  group_by(Embryo, genotype, Somite, Time,E.S)%>%
  summarise(avgXs=mean(X),avgYs=mean(Y))%>%
  left_join(geometric.center,by=c("Embryo","genotype","Time"))%>% #add in geometric center points
  mutate(finalX=avgXs-centerX,finalY=avgYs-centerY)%>% #subtract out geometric center
  ungroup

mean.pos.ML<-ALL1.5%>%
  unite(E.M,c(Embryo,MLpos),sep=".",remove=FALSE)%>%
  group_by(Embryo,genotype, MLpos, Time, E.M)%>%
  summarize(avgXs=mean(X),avgYs=mean(Y))%>%
  left_join(geometric.center,by=c("Embryo","genotype","Time"))%>%
  mutate(finalX=avgXs-centerX,finalY=avgYs-centerY)%>% 
  ungroup
  

mean.mean<-mean.pos.som%>% #find mean of somite position for all embryos
  group_by(genotype, Somite, Time)%>%
  summarise(finalX=mean(finalX),finalY=mean(finalY))%>%
  ungroup
  

##### plots ####

p<-ggplot(mean.pos.som, aes(y=finalX,x=-Time,group=E.S))+
  coord_flip()+
  geom_line(aes(color=Somite))+
  scale_color_gradientn(colors=rainbow(5))+
  facet_grid(~genotype)+
  xlab("Time")+ylab("AP (um)")+
  scale_x_continuous(breaks=c(-40,-20,0),
                     limits=c(-40,0),
                     labels=c("23hpf","21 hpf","18hpf"))+
  ggtitle("AP trajectories over time")+
  geom_line(data=mean.mean, mapping=aes(group=Somite), size=1.1)

p
f1<-p
p.spag.AP<-p


#p1+geom_line(data=mean.mean, mapping=aes(finalY,-Time, group=Somite))+
#  labs(title="Grouped ML displacement",subtitle="colored lines rep mean tracks per somite per embryo, black lines are mean track per embryo")


##### plots for ML ######
mean.mean.ML<-mean.pos.ML%>% #find mean of somite position for all embryos
  group_by(genotype, MLpos, Time)%>%
  summarise(finalX=mean(finalX),finalY=mean(finalY))%>%
  ungroup


p1<-ggplot(mean.pos.ML,aes(y=finalY,x=Time,group=E.M))+
  geom_line(aes(color=MLpos))+
  scale_color_gradientn(colors=rainbow(5))+
  facet_grid(~genotype)+
  geom_line(data=mean.mean.ML, mapping=aes(group=MLpos),size=1.1)+
  scale_x_continuous(breaks=c(0,20,40),
                     limits=c(0,40),
                     labels=c("18hpf","21 hpf","23hpf"))+
  theme(axis.text.y = element_blank(),axis.ticks.y = element_blank())+ # remove numbers and ticks because they aren'y super relevant? 
  xlab("Time")+ylab("ML position")+ # remember all coords are flipped
  ggtitle("ML trajectories calculated by ML position minus COM")


# how are the tracks

#ML not corrected for COM


p3<-ALL1.5%>%
  group_by(Embryo, Time, MLpos, genotype)%>%
  mutate(meanY=mean(Y))%>%
  ggplot(aes(Time, meanY, group=interaction(MLpos,Embryo)))+
  scale_color_gradientn(colors=rainbow(5))+
  geom_line(aes(color=MLpos))+
  facet_grid(~genotype)+
  geom_line(aes(group=MLpos),stat="summary",fun.y="mean", size=1)+
  scale_x_continuous(breaks=c(0,20,40),
                     limits=c(0,40),
                     labels=c("18hpf","21 hpf","23hpf"))+
  labs(title="ML trajectories over time",xlab="Time",ylab="ML position")

p3

p.spag.ML<-p3 # thesis fig
f2<-p3 # for figure making for publication
```

### Compaction factor

I want to also measure convergence by embryo. A value of 1 represents no change in convergence over time while a value less than 1 indicates that the embryo is converging. This statistic looks at the distance between the average outside groups in the limb field (either Somite 1 and 4 [AP] or MLpos 1 and 4) and compares it to the initial distance at 18hpf.

It is an embryo-level statistic.

```{r convergence, fig.height=2}

#calculate convergence for each limb field
con.wt<-convergence(LF.wt)
con.5a<-convergence(LF.5a)
con.5b<-convergence(LF.5b)
con.d<-convergence(LF.doub)

con.all<-assign.geno.all(con.wt,con.5a,con.5b,con.d)

#for plotting
con.summary<- plyr::ddply(con.all, c( "Time","genotype"), summarise,
               N    = length(convergenceX),
               mean = mean(convergenceX),
               sd   = sd(convergenceX),
               se   = sd / sqrt(N))
      

p<-ggplot(con.summary,aes(x=Time,y=mean))+
  geom_point(aes(color=genotype))+
  theme_stat+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype), alpha=0.5)+
  ggtitle("AP convergence")+
  ylab("compaction")
f4<-p # figure making for paper
p.con.AP<-p # thesis fig

##to show just wt, 5a, 5b for figures in presentations

a<-filter(con.summary,genotype!="double")
p3<-ggplot(a,aes(x=Time,y=mean))+
  geom_point(aes(color=genotype))+
  theme_stat+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype), alpha=0.5)+
  ggtitle("AP convergence")+
  ylab("Convergence")


#what does this look like if we let 5b go out to 60 points, does it continue to converge?
test<-filter(Tbx5b, Somite>=1)%>%filter(Somite<5)
con.test<- plyr::ddply(convergence(test),"Time", summarise,
               N    = length(convergenceX),
               mean = mean(convergenceX),
               sd   = sd(convergenceX),
               se   = sd / sqrt(N))
p60<-p+geom_point(data=con.test,color="blue")+
  geom_errorbar(data=con.test,aes(ymin=mean-se,ymax=mean+se),
                color="blue",alpha=0.5)+
  theme_stat.60

## to show everything but double for figures purposes
#p3+geom_point(data=con.test,color="blue")+theme_stat.60


```

```{r convergence ML, fig.height=2}
#let's bin convergence properly along the ML axis

conML.wt<-convergenceML(LF.wt)
conML.5a<-convergenceML(LF.5a)
conML.5b<-convergenceML(LF.5b)
conML.doub<-convergenceML(LF.doub) # double doesn't work right now... need more tracks

conML.all<-assign.geno.all(conML.wt,conML.5a,conML.5b,conML.doub)

#summarize for plotting
conML.summary<- plyr::ddply(conML.all, c( "Time","genotype"), summarise,
               N    = length(convergenceY),
               mean = mean(convergenceY),
               sd   = sd(convergenceY),
               se   = sd / sqrt(N))
#plots          

p<-ggplot(conML.summary,aes(x=Time,y=mean))+
  geom_point(aes(color=genotype))+
  theme_stat+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype),alpha=0.5)+
  labs(title="ML Convergence", y="compaction")

f3<-p #figure making
p.con.ML<-p #thesis fig

# for plotting without double for presentations
pd<-ggplot(filter(conML.summary,genotype!="double"),aes(x=Time,y=mean))+
  geom_point(aes(color=genotype))+
  theme_stat+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype),alpha=0.5)+
  labs(title="Convergence along the ML axis",subtitle="ML axis divided into quarters")

# plot ML and AP combined for report

ggarrange(p60,p, widths=c(2,1), common.legend=TRUE)


# stats for convergence -----

# ML
a<-filter(con.all, Time==40)%>%unique

aov.con<-aov(convergenceX~genotype, data=a)
summary(aov.con)
TukeyHSD(aov.con)


a<-filter(conML.all, Time==40)%>%unique

aov.conML<-aov(convergenceY~genotype, data=a)
summary(aov.conML)
TukeyHSD(aov.conML)

```

When we look at convergence with this statistic, it is pretty clear that Tbx5b deficient embryos are converging while Tbx5a deficient embryos (and doubles) are not. Also, if we extend the Tbx5b timepoints out the full tracked 60 points, they do not appear to be converging much more, so again it is not simply a delay.

This shows that if we look at ML convergence the same way we are looking at convergence along the AP axis (in this case, we divided the ML values of each limb field into quarters and calculated the distance between the mean of each for each embryo). We can also see here that when we remove the center of mass correction, both the wt and Tbx5a deficient embryos are moving medially along the ML axis as a group while the Tbx5b and double deficient embryos are remaining relatively stationary.

Total convergence values

```{r display conv values, echo=FALSE}
con.all%>%
  left_join(conML.all)%>%
  filter(Time==40)%>%
  group_by(genotype)%>%
  summarise(APcon=mean(convergenceX),
            MLcon=mean(convergenceY),
            seAP=sd(convergenceX)/sqrt(length(convergenceX)),
            seML=sd(convergenceY)/sqrt(length(convergenceY)))

```

### Scatter

Qiyan used scatter as a proxy for convergence.

* Scatter(x)=standard deviation(x at time i)/standard deviation (x at time 0)
* so this is the standard deviation of all X (75 tracks) at time 1

Generally, this is telling me how close (or far apart are the cells at any given point compared to when they started). If scatter INCREASES the cells are moving away from each other compared to the start, if it decreases, they are moving closer together

```{r scatter, echo=FALSE}


scatter.all<-scatterfun(ALL1.5)

p1<-ggplot(scatter.all,aes(Time, mscatterX,group=genotype))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mscatterX-seX,ymax=mscatterX+seX,
                    color=genotype),
                alpha=0.2)+
  theme_stat+
  ggtitle("AP Scatter over time s1-5")


p2<-ggplot(data=scatter.all,
          mapping=aes(x=Time,
                      y=mscatterY))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mscatterY-seY,ymax=mscatterY+seY, 
                    color=genotype), 
                alpha=0.2)+
  theme_stat+
  ggtitle("ML Scatter over time s1-5")


#what if we include all 60 time points tracked of 5b
scatter.5b.60<-scatterfun(filter(Tbx5b, Somite>=1)%>%
                            filter(Somite<5)%>%
                            mutate(genotype="Tbx5b"))
p3<-ggplot(data=scatter.5b.60,aes(x=Time,y=mscatterX))+
  geom_point()+
  theme_stat.60+
  labs(title="Scatter for Tbx5b embryos s-15",
       subtitle="Tbx5b AP scatter does not continue to decrease after 40 time points")
grid.arrange(p1,p2,p3, nrow=2)
```

Tbx5b embryos do converge, they just converge(along the AP axis) at a slower rate than wt embryos.  Interestingly, scatter by embryo is highly varied, in terms of absolute value, both in Tbx5b and wt. Is this the best measure? Make sure to include error bars.

Tbx5b scatter does not appear to continue to decrease after 40 time points (as can be seen when we graph scatter out to 60 time points). This suggests that the decreased scatter at time 40 is not simply due to a DELAY in convergence.


```{r scatter somites 1-4 only, fig.height =2}

#find scatter for these cells

as<-scatterfun(LF.all)

#plot
p1<-ggplot(as, aes(x=Time,y=mscatterX))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mscatterX-seX, 
                    ymax=mscatterX+seX, 
                    color=genotype), 
                alpha=0.3)+
  theme_stat+
  ggtitle("AP Scatter SOMITES 1-4 ONLY over time")

p2<-ggplot(as, aes(x=Time,y=mscatterY))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mscatterY-seY, 
                    ymax=mscatterY+seY, 
                    color=genotype), 
                alpha=0.3)+
  theme_stat+
  ggtitle("ML Scatter SOMITES 1-4 ONLY over time")

grid.arrange(p1,p2,nrow=1)


fit<-aov(scatterX~genotype, data=filter(as, Time==40))
summary(fit)
TukeyHSD(fit)

fit<-aov(scatterY~genotype, data=filter(as, Time==40))
summary(fit)
TukeyHSD(fit)
# no significant difference for ML scatter
```

This is interesting to me. When you take out the 5th somite cells (which aren't included in the limb bud), you can see these patterns clearer. 

Also, no significant difference for ML scatter

```{r scatter by somite level, fig.height =2}
#not sure what this says--currently not including in report


scatsom<-scatter.somite(LF.all, somite.final = 4)

p1<-ggplot(scatsom, aes(Time, mscatterX))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mscatterX-seX, ymax=mscatterX+seX, color=genotype), alpha=0.5)+
  facet_grid(~Somite)+
  ggtitle("Scatter per somite level over time")+
  theme_stat

scatsom.40<-filter(scatsom, Time==40)%>%
  select(genotype, Somite, mscatterX, seX)%>%
  unique%>%
  ungroup

p2<-ggplot(scatsom.40, aes(Somite, mscatterX, group=genotype))+
  geom_line(aes(color=genotype))+
 # geom_errorbar(aes(ymin=mscatterX-seX, ymax=mscatterX-seX))+ # these aren't working and I don't know why
  scale_color_manual(values=colorscale)+
  ggtitle("Final scatter values across the AP axis")

ggarrange(p1,p2, widths=c(4,1),common.legend = TRUE)


# qiyan compared scatter of the fin field to scatter ot the peritneum

# as is scatter for fin field
sc5<-scatterfun(filter(ALL1.5, Somite==5))

p3<-ggplot(as, aes(x=Time,y=mscatterX, group=Embryo))+
  geom_line()+
  geom_ribbon(aes(ymin=mscatterX-seX, 
                    ymax=mscatterX+seX), 
                alpha=0.3)+
  geom_line(data=sc5, color="purple")+
 # geom_ribbon(data=sc5)+
  theme_stat+
  facet_grid(~genotype, scale="free_y")+
 # ylim(-1,5)+
  ggtitle("scatter in limb field versus scatter somites 5")

test<-filter(sc5, genotype=="Tbx5b", Time==40)

p3

```

What is ging on with somite 1? It seems like that is where all the action is. In all the other somites, the scatter segregates pretty nearly either with all morphants together or all. Also there is definitely weird stuff going on in the cells adjacent to somite 5 in Tbx5b deficient embryos, but that is because one embryo is a really large outlier and I haven't figured out how to deal with it.

## Displacement

This tells us displacement along the AP and ML axis. Displacement tells us how far the cell has moved relative to its initial starting point.

  
```{r displacement, echo=FALSE} 

sum.disp.XY<-function(df){
  b<-displacement.XY(df)
  a<- plyr::ddply(b, c( "Time"), summarise,
               N    = length(displacement),
               mean = mean(displacement),
               sd   = sd(displacement),
               se   = sd / sqrt(N))
}

#now calculate displacements for each dataset
disp.wt<-displacement.func(wt)
disp.5a<-displacement.func(Tbx5a)
disp.5b<-displacement.func(Tbx5b.40.s1.s5)
disp.doub<-displacement.func(double.40.s1.s5)

disp.total<-assign.geno.all(disp.wt,disp.5a,disp.5b, disp.doub)
rm(disp.wt,disp.5a,disp.5b,disp.doub) # clean up

t.disp.wt<-sum.disp.XY(wt)
t.disp.5a<-sum.disp.XY(Tbx5a)
t.disp.5b<-sum.disp.XY(Tbx5b.40.s1.s5)
t.disp.doub<-sum.disp.XY(double.40.s1.s5)

t.disp<-assign.geno.all(t.disp.wt,t.disp.5a,t.disp.5b,t.disp.doub)
rm(t.disp.wt,t.disp.5a,t.disp.5b,t.disp.doub) # clean up

disp.total.sum<-disp.total%>%
  select(Time,avgXdisp, avgYdisp, seX, seY, genotype)%>%
  unique
  

p1<-ggplot(data=disp.total.sum,mapping=aes(x=Time,
                                   y=avgXdisp,
                                   group=Time))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=avgXdisp-seX,ymax=avgXdisp+seX,color=genotype),alpha=0.5)+
  theme_stat+
  ylab("displacement")+
  ggtitle("AP displacement")

f5<-p1 # figure making for paper
p.disp.AP<-p1 # thesis fig


p2<-ggplot(data=disp.total.sum,mapping=aes(x=Time,
                                   y=avgYdisp,
                                   group=Time))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=avgYdisp-seY,ymax=avgYdisp+seY,color=genotype),alpha=0.5)+ #need to get error for just AP/ML
  theme_stat+
  ylab("displacement")+
  ggtitle("ML displacement")

f6<-p2 # figure making for paper
p.disp.ML<-p2

p3<-ggplot(data=t.disp,mapping=aes(x=Time,
                                   y=mean,
                                   group=genotype))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype), alpha=0.5)+
  theme_stat+
  ylab("Displacement (um)")+
  ggtitle("Total displacement")

grid.arrange(p1,p2,p3, nrow=2)


## for presentation purposes where I want to not talk about the double right away
a<-filter(disp.total.sum, genotype!="double")
p1a<-ggplot(data=a,mapping=aes(x=Time,
                                   y=avgXdisp,
                                   group=Time))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=avgXdisp-seX,ymax=avgXdisp+seX,color=genotype), alpha=0.5)+
  theme_stat+
  ylab("displacement")+
  ggtitle("AP displacement")


p2a<-ggplot(data=a,mapping=aes(x=Time,
                                   y=avgYdisp,
                                   group=Time))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=avgYdisp-seY,ymax=avgYdisp+seY,color=genotype),alpha=0.5)+
  theme_stat+
  ylab("displacement")+
  ggtitle("ML displacement")

t.disp%>%
  filter(Time==40)

disp.total%>%
  filter(Time==40)%>%
  select(genotype, avgXdisp, seX, avgYdisp, seY)%>%
  unique

```

Both Tbx5a mutant and Tbx5b mutants show similar levels of displacement along the AP (X) axis. However, the overall displacement along the medial-lateral axis remains fairly constant for Tbx5b mutants, compared to the relatively similar displacement in Tbx5a and wt embryos.

Of course, I need to double check this after I extract the Z values and then perform digital flat mounting to make sure the ML differences aren't caused exclusively by tilt/alignment issues.

In general the double seems to follow the displacement values for Tbx5b, looks like they are displaced a little more AP than the Tbx5a (map closer to Tbx5b) but have a negative displacement ML? What would that mean?

It looks like there may be a mild correlation between displacement and starting X position


* global displacement is just location at final time minus location at start time

```{r displacement vs position, echo=FALSE}

ci<-function(df){ # for finding 95% confidence intervals
  a<-1.96*sd(df)/(sqrt(length(df)))
}


#calculate average displacement along the X and Y axis --- in order to do this, I divided the X and Y axis into 20-25 bins and then found the mean displacement in that bin
a<-filter(disp.total,Time==1)%>%
  rename(startX=X,startY=Y)%>%
  select(Embryo,Track, genotype, startX,startY)
b<-filter(disp.total,Time==40)
c<-left_join(b,a,by=c("Embryo","Track","genotype"))%>% #this adds the starting value on to everything to index for graphing
#  filter(Somite<5)%>% # keep this comment for my paper figures, comment out otherwise
  select(-avgXdisp,-avgYdisp, -seX,-seY,-Time,-X,-Y)%>%# remove extra columns to make more readable
  group_by(genotype)%>% ### now we are calculating the average displacement based on starting values
  mutate(binX=ntile(startX,25))%>% # divide x axis into 25
  mutate(binY=ntile(startY,20))%>% # divide y axis into 20
  ungroup%>%
  group_by(genotype,binX)%>%
  mutate(XbinX=mean(displacementX), # AP grouped by AP
         ciXbX=ci(displacementX), #calculate 95% confidence interval
         YbinX=mean(displacementY),# ML grouped by AP
         ciYbX=ci(displacementY),
         avgstartX=mean(startX))%>% #to make it one point instead of step-like
  ungroup%>%
  group_by(genotype,binY)%>%
  mutate(XbinY=mean(displacementX), #AP grouped by ML
         YbinY=mean(displacementY),#ML grouped by ML
         ciXbY=ci(displacementX),
         ciYbY=ci(displacementY),
         avgstartY=mean(startY)) 

# c$Somite<-as.factor(c$Somite) # for figure
#### plots ####
colorsfig<-c("red","orange","green","blue")

p1<-ggplot(c, aes(startX,displacementX))+
  geom_ribbon(aes(avgstartX,ymin=XbinX-ciXbX,ymax=XbinX+ciXbX))+#,alpha=0.5)+ #95% CI
  geom_point(aes(color=Somite),alpha=0.1)+ # raw data
  scale_color_gradientn(colors = rainbow(5))+
 # scale_color_manual(values=colorsfig)+ # figure
  facet_grid(~genotype)+ 
  ggtitle("AP displacement compared to starting AP position")+
  geom_line(mapping=aes(avgstartX, XbinX),size=1)

p2<-ggplot(c, aes(startX,displacementY))+
  geom_ribbon(aes(avgstartX,ymin=YbinX-ciYbX,ymax=YbinX+ciYbX),alpha=0.5)+
  geom_point(aes(color=Somite), alpha=0.1)+
  scale_color_gradientn(colors = rainbow(5))+
  facet_grid(~genotype)+ 
  ggtitle("ML displacement compared to starting AP position")+
  geom_line(mapping=aes(avgstartX, YbinX),size=1)
  
p3<-ggplot(c, aes(startY,displacementX))+
  geom_ribbon(aes(avgstartY,ymin=XbinY-ciXbY,ymax=XbinY+ciXbY))+
  geom_point(aes(color=MLpos), alpha=0.1)+
  scale_color_gradientn(colors = rainbow(5))+
  facet_grid(~genotype)+ 
  ggtitle("AP displacement compared to starting ML position")+
  geom_line(mapping=aes(avgstartY, XbinY),color="black",size=1)

p4<-ggplot(c, aes(startY,displacementY))+
  geom_ribbon(aes(avgstartY,ymin=YbinY-ciYbY,ymax=YbinY+ciYbY),alpha=0.5)+
  geom_point(aes(color=MLpos),alpha=0.1)+
  scale_color_gradientn(colors = rainbow(5))+
  facet_grid(~genotype)+ 
  ggtitle("ML displacement compared to starting ML position")+
  geom_line(mapping=aes(avgstartY, YbinY),color="black", size=1)

 grid.arrange(p1,p2,p3,p4)
 
# thesis figs
p.disp.APbyAP<-p1
p.disp.MLbyAP<-p2
p.disp.APbyML<-p3  
p.disp.MLbyML<-p4 

c%>%
  group_by(genotype,Somite)%>%
  select(genotype, Somite,displacementX)%>%
  summarise(mX=mean(displacementX))%>%
  unique%>%
  spread(genotype, mX)%>%
  mutate(diffB=wt-Tbx5b)
# difference between displacement is greater for somites 1 and 2 than 3 and 4

```


<P style="page-break-before: always">
### Displacement plots including direction

```{r displacement directionality}

d1<-filter(ALL1.5,Time==1)
d2<-filter(ALL1.5,Time==40)

startstop<-rbind(d1,d2)

p<-ggplot(startstop,mapping=aes(x=X,y=Y,group=Track))+
  facet_grid(genotype~Embryo)+
  geom_line(aes(color=Somite))+
  theme_track+
  geom_point(data=d1, aes(X,Y), size=0.2)+ #plot with a dot at time 1
  ggtitle("Displacement at starting time versus final time")


# can I use arrows?
pa<-ggplot(startstop,mapping=aes(x=X,y=Y,group=Track))+
  facet_grid(Embryo~genotype)+
  geom_path(arrow=arrow(type="closed", length=unit(1,"mm")), #lenngth adjusts size of arrowheads 
            aes(color=Somite))+ # geom_line does not get the arrows in the correct order but geom_path does ... confirm it is correct for all
  theme_track+
#  theme(panel.background = element_rect(fill = 'black', colour = 'black'))+ # black background is good for powerpoint presentations but not for print
  ggtitle("Displacement Directionality")

pa

ss2<-startstop%>%unite(combo, c(Embryo, Track))
pa2<-ggplot(ss2,mapping=aes(x=X,y=Y,group=combo))+
  facet_grid(~genotype)+
  geom_path(arrow=arrow(type="closed", length=unit(1,"mm")), #lenngth adjusts size of arrowheads 
            aes(color=Somite))+ # geom_line does not get the arrows in the correct order but geom_path does ... confirm it is correct for all
  theme_track+
#  theme(panel.background = element_rect(fill = 'black', colour = 'black'))+ # black background is good for powerpoint presentations but not for print
  ggtitle("Displacement Directionality")


#for my personal use, do cells end up above or below
p<-ggplot(startstop,mapping=aes(x=X,y=Y,group=Track))+
  facet_wrap(~genotype+Embryo)+geom_point(aes(color=Time))

#displacememnt directionality subtracted COM

p.COM<-startstop%>%
  left_join(geometric.center, by=c("Embryo","Time","genotype"))%>%
  mutate(Xc=X-centerX,
         Yc=Y-centerY)%>%
  ggplot(aes(Xc,Yc, group=Track))+
  geom_path(arrow=arrow(type="closed", length=unit(1,"mm")),  
            aes(color=Somite))+ 
  theme_track+
  facet_grid(genotype~Embryo)+
  ggtitle("Displacement corrected for COM")

p.COM.AP<-startstop%>%
  left_join(geometric.center, by=c("Embryo","Time","genotype"))%>%
  mutate(Xc=X-centerX)%>%
  ggplot(aes(Xc,Y, group=Track))+
  geom_path(arrow=arrow(type="closed", length=unit(1,"mm")),  
            aes(color=Somite))+ 
  theme_track+
  facet_grid(Embryo~genotype)+
  ggtitle("Displacement corrected for COM AP only") # I'm almost certain that Qiyan corrected for COM only along the AP axis, because the displacement graphs get really weird if you correct for COM of the ML as well, because of the tissue-wise ML migration of the finbud in wt/Tbx5a


```

I think that this chart very clearly shows the difference in displacement in some of these cells. 

### Polar Plots and Rose diagrams


```{r polar plot diagrams}

#### not corrected for the center of gravity ####
a<-filter(startstop,Time==1)
b<-filter(startstop,Time==40)

rose<-mutate(a,displacement=sqrt((X-b$X)^2+(Y-b$Y)^2))%>% #calculate displacement
  mutate(slope=(b$Y-Y)/(b$X-X))%>% #calculate slope
  mutate(angle=atan(slope))%>%
  select(-X,-Y,-slope,-Time)

#summarise by somite and genotype so tha I can graph it
rose2<- plyr::ddply(rose, c("Somite","genotype"), summarise,
                    N    = length(displacement),
                    mean.disp = mean(displacement),
                    mean.ang=mean.circular(angle),
                    angle_b  = 180*(mean.ang )/pi) 

p<-ggplot(rose2, aes(x=angle_b,y=mean.disp))+
  geom_bar(stat="identity",aes(color=Somite)) + 
  scale_color_gradientn(colors=rainbow(5))+ 
  scale_x_continuous(breaks=c(-90,0,90,180), limits=c(-180,180)) + 
  coord_polar(direction=-1,start=pi/2)+facet_grid(~genotype)+ #need to offset the starting point bc otherwise it starts at 12:00
  labs(title="Track migration angle",
       subtitle="no correction for COM",
       x="angle",y="displacement")

p


#this works! thank you to stackoverflow!

#although still does not quite match Qiyan's data


```

Several comments.

* These are not true rose diagrams. The length of the bar does not represent the quantity of frequency of tracks in that direction, but rather the average displacement value for that somite. It's really just a bar graph with polar coordinates.
* This does not correlate directly to Qiyan's "rose diagrams", although the relative positions of each somite position to each other does correlate. I believe that this is a more accurate way of representing this data. If you look at the plot in the previous section labelled dispalcement directionality, you can see that these angles correspond with the angles of displacement of the above tracks
* I believe that Qiyan's rose diagrams are not actually what they say they are. I think that they made actually be plots where you subtract the AP values only from the goemetric center of mass and measure the resulting angles. See attached graph below. However, as you will see below, this is misleading for comparing Tbx5b and double deficient embryos, because it gives the impression that these embryos as primarily migrating at 90 degree angles while in fact they are primarily moving along the AP/ X axis. (See below)


```{r polar plot diagrams corrected for geometric center}

#close but still not quite right

#use the previously generated dataframe corrected by geometric center per somite, as above
a<-filter(mean.pos.som,Time==1)
b<-filter(mean.pos.som, Time==40)

rose.center<-mutate(a,displacement=sqrt((finalX-b$finalX)^2+(finalY-b$finalY)^2))%>% #calculate displacement
  mutate(slope=(b$finalY-finalY)/(b$finalX-finalX))%>% #calculate slope
  mutate(angle=atan(slope))%>%
  select(Embryo,genotype,Somite, E.S,displacement,angle)

#summarise by somite and genotype so tha I can graph it
rose2.center<- plyr::ddply(rose, c( "Somite","genotype"), summarise,
               N    = length(displacement),
               mean.disp = mean(displacement),
               sd.disp   = sd(displacement),
               se.disp   = sd.disp / sqrt(N),
               mean.ang = mean(angle),
               sd.ang   = sd(angle),
               se.ang   = sd.ang / sqrt(N),
               angle_b  = 180*(mean.ang )/pi+90+45) #need to add the 90 to get rid of the negative angles

p2<-ggplot(rose2.center, aes(x=angle_b,y=mean.disp))+
  geom_bar(stat="identity",aes(color=Somite),width=1) + 
  scale_color_gradientn(colors=rainbow(5))+ 
  scale_x_continuous(breaks=c(0,90,180,270), limits=c(0,360),
                     labels=c(270,0,90,180)) + #relabel so that it matches me adjusted angles
  coord_polar(direction=1)+
  ggtitle("Rose diagram, corrected COM")+
  facet_grid(~genotype) #+geom_text(aes(label=Somite),hjust=.3,vjust=0.3)


# rose diagrams where I plot one ine per embryo #####

rose2.center<- plyr::ddply(rose, c( "Somite","genotype","Embryo"), summarise,
               N    = length(displacement),
               mean.disp = mean(displacement),
               sd.disp   = sd(displacement),
               se.disp   = sd.disp / sqrt(N),
               mean.ang = mean(angle),
               sd.ang   = sd(angle),
               se.ang   = sd.ang / sqrt(N),
               angle_b  = 180*(mean.ang )/pi+180) #need to add the 90 to get rid of the negative angles

p3<-ggplot(rose2.center, aes(x=angle_b,y=mean.disp))+
  geom_bar(stat="identity",aes(color=Somite),width=1) + 
  scale_color_gradientn(colors=rainbow(5))+ 
  scale_x_continuous(breaks=c(0,90,180,270), limits=c(0,360),
                     labels=c(270,0,90,180)) + #relabel so that it matches me adjusted angles
  coord_polar(direction=1)+
  ggtitle("1 line per embryo")+
  facet_grid(~genotype) #+geom_text(aes(label=Somite),hjust=.3,vjust=0.3)

# rose where I only subtract the AP/X values from the COM  

disp.angles<-startstop%>%
  left_join(geometric.center, by=c("Embryo","Time","genotype"))%>%
  mutate(Xc=X-centerX)%>%
  select(-X,-centerX,-centerY) %>%
  melt(idvar=Time,measure.vars=c("Xc","Y"))%>% # spread X and Y values across the whole Embryo
  unite(temp,Time,variable)%>%
  spread(temp,value)%>%
  rename(X.1="1_Xc",X.40="40_Xc",Y.1="1_Y",Y.40="40_Y") %>% #rename so that they don't start w a #
  mutate(dispX=X.40-X.1, 
         dispY=Y.40-Y.1, 
         disp=sqrt(dispX^2+dispY^2))%>%
  mutate(ang=ifelse(dispY<0, # for angles between -90 and 90
                    asin(dispX/disp)*180/pi,
                    ifelse(dispX>0, 
                           asin(dispX/disp)*180/pi+90,# if x positive, add the angle + 90
                           asin(dispX/disp)*180/pi-90))) # if x negative, angle is -, add -90


p<-ggplot(disp.angles, aes(ang))+geom_histogram() # test to see range

m.disp.angles<-disp.angles%>%
  group_by(genotype, Somite)%>%
  mutate(m.ang=mean(ang), m.disp=mean(disp))%>%
  select(genotype, Somite, m.ang, m.disp)%>%
  unique
  
  
  #plot # polar plots
ggplot(m.disp.angles,aes(x=m.ang,y=m.disp))+
  geom_bar(stat="identity",aes(color=Somite),width=0.01) + 
  scale_color_gradientn(colors=rainbow(5))+ 
  scale_x_continuous(breaks=c(-90,0,90,180), limits=c(-180,180)) + #relabel so that it matches me adjusted angles
  coord_polar(direction=-1,start=2*pi)+
  facet_grid(~genotype)+
  labs(title="Displacement corrected by AP center of Mass only",
       subtitle="COM of the LF only",
       x="angle",
       y="Displacement")
          

m2<-disp.angles%>%
  group_by(Embryo, Somite, genotype)%>%
  mutate(m.ang=mean(ang), m.disp=mean(disp))%>%
  select(Embryo, Somite,genotype, m.ang, m.disp)%>%
  unique
p<-ggplot(m2,aes(x=m.ang,y=m.disp))+
  geom_bar(stat="identity",aes(color=Somite), width=1) + 
  scale_color_gradientn(colors=rainbow(5))+ 
  scale_x_continuous(breaks=c(-90,0,90,180), limits=c(-180,180)) + #relabel so that it matches me adjusted angles
  coord_polar(direction=-1,start=2*pi)+
  facet_grid(Embryo~genotype)+
  labs(title="Displacement corrected by AP center of Mass only",
       subtitle="COM of the LF only",
       x="angle",
       y="Displacement")
 
########## not what I want ######

 
a<-filter(mean.mean,Time==1) # dataset by somite corrected for Center of gravity
b<-filter(mean.mean, Time==40)

rose.mean.center<-mutate(a,
                         displacement=sqrt((finalX-b$finalX)^2+(finalY-b$finalY)^2))%>% #calculate displacement
  mutate(slope=(b$finalY-finalY)/(b$finalX-finalX))%>% #calculate slope
  mutate(angle=atan(slope),
         deg.angle=180*angle/pi)%>%
  select(genotype,Somite,displacement,angle,deg.angle)%>%
  ggplot(aes(x=deg.angle,y=displacement))+
  geom_bar(stat="identity",aes(color=Somite)) + 
  scale_color_gradientn(colors=rainbow(5))+ 
scale_x_continuous(breaks=c(-90,0,90,180), limits=c(-180,180)) + 
  coord_polar(direction=-1,start=pi/2)+
  facet_grid(~genotype)+ labs(title="Displacement corrected by COM",
       subtitle="both AP and ML corrections",
       x="angle",
       y="Displacement")


##### actual rose diagrams ########

rose.diagram<-list(geom_histogram(),
                   scale_x_continuous(breaks=c(0,90, 180, 270, 360), 
                                      limits=c(-180, 180)),
                   coord_polar(start=pi/2, direction =-1),
                   xlab("Angle (degrees)"))

ggplot(rose, aes(angle*180/pi))+
  rose.diagram+
  facet_grid(Somite~genotype)+
  ggtitle("No correction for center of mass")

#ggplot(rose.center, aes(angle*180/pi))+
#  rose.diagram+
#  facet_grid(Somite~genotype)


# what if I correct for COM--AP only

a=filter(disp.angles, Somite<5)
a$Somite<-as.factor(a$Somite)


scale2=c("red","gold","green","blue")

p<-ggplot(a, aes(ang))+
  rose.diagram+
  coord_polar(start=2*pi, direction =-1)


p<-ggplot(a, aes(x=ang, fill=Somite))+
  geom_histogram(position="identity", alpha=0.5)+
  scale_x_continuous(breaks=c(0,90, 180, 270, 360), 
                     limits=c(-180, 180))+
  coord_polar(start=2*pi, direction =-1)+
  scale_fill_manual(values=scale2)+
  facet_grid(~genotype) # this one is colored

p+facet_grid(genotype~Somite)
p+facet_grid(~genotype)


```

```{r circular stats}

# first we have to break things up by genotype, then by somite

quicksort<-function(genotype){
  a<-filter(rose, genotype==genotype)%>%
  select(Somite, angle)
  all<-a$angle
  b<-a%>%
    mutate(i=row_number())%>%
    spread(Somite,angle)
  s1<-b$"1"%>%na.omit
  s2<-b$"2"%>%na.omit
  s3<-b$"3"%>%na.omit
  s4<-b$"4"%>%na.omit
  return(list(s1,s2,s3,s4))
}

quickall<-function(geno){
  a<-filter(rose, genotype==geno)%>%
    filter(Somite<5)
  b<-a$angle
  return(b)
}


# is there a significance between the mean angle of any of the somite groups
watson.wheeler.test(quicksort(wt))
watson.wheeler.test(quicksort(Tbx5a))
watson.wheeler.test(quicksort(Tbx5b))  
watson.wheeler.test(quicksort(double))

#yes for all

# uniformity

watson.test(quickall("wt"),dist="uniform") 
watson.test(quickall("Tbx5a"),dist="uniform")
watson.test(quickall("Tbx5b"),dist="uniform")
watson.test(quickall("double"),dist="uniform")

# null hypothesis is that dist is uniform so if p<0.05, then it rejects null hypothesis

a<-quicksort(double)

#yes for all

```

These are proper rose diagrams, displaying the average angles of migration per embryo. 

## Speed--Average and Instantaneous

I'm pretty sure that I mean velocity by these calculations

Instantaneous= speed between the two time points
- need to create one per track and then plot the combined/average for all tracks
Average =average of all instantaneous speeds


```{r insta.speed, echo=FALSE, fig.height=3}

speed.sum<-function(df){ 
  b<-speedfun(df)
  a<- plyr::ddply(b, c( "Time"), summarise,
               N    = length(insta.speed),
               mean = mean(insta.speed),
               sd   = sd(insta.speed),
               se   = sd / sqrt(N))
 return(a)
}

speed.wt<-speed.sum(wt)
speed.5a<-speed.sum(Tbx5a)
speed.5b<-speed.sum(Tbx5b.40.s1.s5) #let's look at it just within the same boundaries first
speed.doub<-speed.sum(double.40.s1.s5)

speed.all<-assign.geno.all(speed.wt, speed.5a,speed.5b,speed.doub)
rm(speed.wt,speed.5a,speed.5b,speed.doub)

ps<-ggplot(data=speed.all,mapping=aes(x=Time,
                                    y=mean))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype))+
  ggtitle("Average speed over time")+
  ylab("Speed")+
  theme_stat

## for presenations

a<-filter(speed.all, genotype!="double")
pp<-ggplot(data=a,mapping=aes(x=Time,
                                    y=mean))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype))+
  ggtitle("Average speed over time")+
  ylab("Speed")+
  theme_stat

##### overall speed #####
a<-filter(speed.all, Time<=40)
p<-ggplot(a, aes(genotype,mean))+
  geom_boxplot(aes(color=genotype))+
  scale_color_manual(values=colorscale)+
  ylab("Speed (um/min)")+ggtitle("Mean speed")

f7<-p # for making figures
p.speed<-p # for making figures

grid.arrange(ps,p, nrow=1) 

#for presentations
p<-ggplot(filter(a,genotype!="double"), aes(genotype,mean))+
  geom_boxplot(aes(color=genotype))+
  scale_color_manual(values=colorscale)+
  ylab("Speed")+ggtitle("Mean speed over all time points")


#stats to test if there is a significant difference?

aw<-filter(a, genotype=="wt")
ab<-filter(a, genotype=="Tbx5b")
t.test(aw$mean,ab$mean)
#hmm it looks like there might be a significant difference in speed?

# more complete stats--not an average of an average
sw<-speedfun(wt)
sa<-speedfun(Tbx5a)
sb<-speedfun(Tbx5b.40.s1.s5)
sd<-speedfun(double.40.s1.s5)

sall<-assign.geno.all(sw,sa,sb,sd)

p<-ggplot(sall,aes(genotype,insta.speed))+
  geom_boxplot(aes(color=genotype))+
  scale_color_manual(values=colorscale)+
  ylab("Speed")+ggtitle("Speed over all time points")


t.test(sw$insta.speed,sb$insta.speed)  #still a p value <0.05
t.test(sw$insta.speed,sd$insta.speed) #p<0.05 significant

##### plot speed wrt to starting AP and ML position ######
a<-speedfun(wt)
b<-speedfun(Tbx5a)
c<-speedfun(Tbx5b.40.s1.s5)
d<-speedfun(double.40.s1.s5)

speed<-assign.geno.all(a,b,c,d)
rm(a,b,c,d) # clean up


a<-filter(ALL1.5,Time==1)%>%
  select(Embryo, Track, genotype,X,Y)%>%
  rename(startX=X,startY=Y) # starting coordinates
b<-speed%>%
  group_by(genotype, Embryo, Track)%>%
  mutate(avgSpeedTrack=mean(insta.speed))%>% # find average speed for each track
  select(Embryo, Track, Somite, MLpos, genotype, avgSpeedTrack)%>%
  ungroup%>%
  unique
c<-left_join(b,a,by=c("Embryo", "Track", "genotype"))%>%
  group_by(genotype)%>% ### now we are calculating the average displacement based on starting values
  mutate(binX=ntile(startX,25))%>% # divide x axis
  mutate(binY=ntile(startY,20))%>% # divide y axis
  ungroup%>%
  group_by(genotype,binX)%>%
  mutate(APspeed=mean(avgSpeedTrack), # speed grouped by AP
         ciAP=ci(avgSpeedTrack),
         avgstartX=mean(startX))%>% 
  ungroup%>%
  group_by(genotype,binY)%>%
  mutate(MLspeed=mean(avgSpeedTrack),#speed grouped by ML
         ciML=ci(avgSpeedTrack),
         avgstartY=mean(startY)) 


p1<-ggplot(c, aes(startX,avgSpeedTrack))+
  geom_ribbon(aes(avgstartX,ymin=APspeed-ciAP,ymax=APspeed+ciAP))+
  geom_point(aes(color=Somite),alpha=0.1)+
  scale_color_gradientn(colors=rainbow(5))+
  facet_grid(~genotype)+
  labs(title="Speed by starting AP position",
       x="starting AP position",
       y="Speed (um/min)")+
  geom_line(mapping=aes(avgstartX,APspeed),size=1) #plot the average valies as a line

p.speed.AP<-p1 # for making figures

p2<-ggplot(c, aes(startY,avgSpeedTrack))+
  geom_ribbon(aes(avgstartY,ymin=MLspeed-ciML,ymax=MLspeed+ciML))+
  geom_point(aes(color=MLpos),alpha=0.1)+
  scale_color_gradientn(colors=rainbow(5))+
  facet_grid(~genotype)+
  labs(title="Speed by starting ML position",
       x="starting ML position",
       y="Speed (um/min)")+
  geom_line(mapping=aes(avgstartY,MLspeed),size=1) #plot the average valies as a line

p.speed.ML<-p2 # for making figures
p1
p2

```


## Persistance

How effective are the embryos at reaching their final destination. (Or how directed is their movement?)

```{r global persistance, echo=FALSE,fig.height=5}

persistance.wt<-persistance.fun(wt)
persistance.5b<-persistance.fun(Tbx5b.40.s1.s5)
persistance.5a<-persistance.fun(Tbx5a)
persistance.doub<-persistance.fun(double.40.s1.s5)
persistance.all<-assign.geno.all(persistance.wt,persistance.5a,persistance.5b,persistance.doub)

p.sum<-function(df){ #sometimes this still throws NAs but whatever
 a<- plyr::ddply(df, c( "Time"), summarise,
               N    = length(persistance),
               mean = mean(persistance),
               sd   = sd(persistance),
               se   = sd / sqrt(N))
 return(a)
}

p.wt<-p.sum(persistance.wt)
p.5a<-p.sum(persistance.5a)
p.5b<-p.sum(persistance.5b)
p.doub<-p.sum(persistance.doub)

p.all<-assign.geno.all(p.wt,p.5a,p.5b,p.doub)
```


```{r persistance plots, echo=FALSE,fig.height=2}
p<-ggplot(data=p.all,mapping=aes(x=Time,
                             y=mean))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype))+
  theme_stat+
  ggtitle("Persistance")+
  ylab("persistance")


p.persistance<-p

# wonder what the big diffence in total track length is?

#no double for presentations
a<-filter(p.all, genotype!="double")
pd<-ggplot(data=a,mapping=aes(x=Time,
                             y=mean))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype))+
  theme_stat+
  ggtitle("Persistance over time")+
  ylab("Persistance")


#### persistance compared to starting AP and ML position #####
a<-filter(ALL1.5,Time==1)%>%
  select(Embryo, Track, genotype, X,Y)%>% #make index
  rename(startX=X,startY=Y)
b<-filter(persistance.all, Time==40)
c<-left_join(b,a,by=c("Embryo","Track","genotype"))%>%
  group_by(genotype)%>% ### now we are calculating the average displacement based on starting values ### haven't done this yet
  mutate(binX=ntile(startX,25))%>% # divide x axis
  mutate(binY=ntile(startY,20))%>% # divide y axis
  ungroup%>%
  group_by(genotype,binX)%>%
  mutate(AP.persistance=mean(persistance),
         ciAP=ci(persistance),
         avgstartX=mean(startX))%>% # speed grouped by AP
  ungroup%>%
  group_by(genotype,binY)%>%
  mutate(ML.persistance=mean(persistance),
         ciML=ci(persistance),
         avgstartY=mean(startY)) #speed grouped by ML

p1<-ggplot(c, aes(startX,persistance))+
  geom_ribbon(aes(avgstartX,ymin=AP.persistance-ciAP,ymax=AP.persistance+ciAP))+
  geom_point(aes(color=Somite),alpha=0.1)+
  scale_color_gradientn(colors=rainbow(5))+
  facet_grid(~genotype)+
  geom_line(mapping=aes(avgstartX,AP.persistance), size=1)+ # add average
  labs(title="Persistance by starting AP position",
       x="AP position", y="Persistance")
p.pers.AP<-p1 #thesis figs


p2<-ggplot(c, aes(startY,persistance))+
  geom_ribbon(aes(avgstartY,ymin=ML.persistance-ciML,ymax=ML.persistance+ciML))+
  geom_point(aes(color=MLpos),alpha=0.1)+
  scale_color_gradientn(colors=rainbow(5))+
  facet_grid(~genotype)+
  geom_line(mapping=aes(avgstartY,ML.persistance), size=1)+
  ggtitle("Persistance by starting ML position")
p.pers.ML<-p2 # thesis figs

p
p1
p2
```

Looks like the Tbx5b morphants are less persistant that the wt


##Total track length

```{r total track lengths, fig.height=2}
#totally already calculated these in my persistance function

sum.dist.sum<-function(df){ #sometimes this still throws NAs but whatever
 b<-tracklength(df)
  a<- plyr::ddply(b, c( "Time"), summarise,
               N    = length(track.length),
               mean = mean(track.length),
               sd   = sd(track.length),
               se   = sd / sqrt(N))
 return(a)
}

d.wt<-sum.dist.sum(persistance.wt)
d.5a<-sum.dist.sum(persistance.5a)
d.5b<-sum.dist.sum(persistance.5b)
d.doub<-sum.dist.sum(persistance.doub)

tracklength<-assign.geno.all(d.wt,d.5a,d.5b, d.doub) # i feel like there is a quicker way to do this. oh well


p<-ggplot(data=tracklength,mapping=aes(x=Time,
                                        y=mean,
                                       group=genotype))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype))+
  theme_stat+
  ggtitle("Total Track Length over time")+
  ylab("Total track length")
p

a<-filter(persistance.all, Time==40)
p<-ggplot(a, aes(genotype, track.length))+
  geom_boxplot(aes(color=genotype))+
  scale_color_manual(values=colorscale)+
  ylab("Tracklength (um)")+ggtitle("Average tracklength")
f8<-p
p.tracklength<-p # thesis figs

#no double for presentations
a<-filter(tracklength, genotype!="double")
pa<-ggplot(data=a,mapping=aes(x=Time,
                                        y=mean,
                                       group=genotype))+
  geom_point(aes(color=genotype))+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se,color=genotype))+
  theme_stat+
  ggtitle("Total Track Length over time")+
  ylab("Total track length")


### stats for tracklength ####

a<-filter(persistance.all, Time==40)%>%
  select(genotype, track.length)


a.wt<-filter(a, genotype=="wt")$track.length
a.5b<-filter(a,genotype=="Tbx5b")$track.length
a.doub<-filter(a, genotype=="double")$track.length

t.test(a.wt, a.5b)
t.test(a.wt, a.doub)

a.t<-aov(track.length~genotype, data=a)
summary(a.t)
TukeyHSD(a.t)
```


```{r track length by somite}
a<-tracklength.somite(persistance.wt)
b<-tracklength.somite(persistance.5a)
c<-tracklength.somite(persistance.5b)
d<-tracklength.somite(persistance.doub)

track.length.by.somite<-assign.geno.all(a,b,c,d)
rm(a,b,c,d) #cleanup

lengthfinal<-filter(track.length.by.somite,Time==40)

pS<-ggplot(lengthfinal,aes(x=Somite, 
                          y=mean,
                          group=genotype))+
  scale_fill_gradientn(colors=rainbow(5))+
  geom_bar(stat="identity",aes(fill=Somite))+ #otherwise it just does it by count
  facet_grid(~genotype)+
  ylab("track length")+
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se))+ggtitle("Final track length by somite")


#### total track length by starting AP and ML position #########

a<-filter(ALL1.5,Time==1)%>%
  select(Embryo, Track, genotype, X,Y)%>% #make index
  rename(startX=X,startY=Y)
b<-filter(persistance.all, Time==40)
c<-left_join(b,a,by=c("Embryo","Track","genotype"))%>%
  group_by(genotype)%>% ### now we are calculating the average displacement based on starting values ### haven't done this yet
  mutate(binX=ntile(startX,25))%>% # divide x axis
  mutate(binY=ntile(startY,20))%>% # divide y axis
  ungroup%>%
  group_by(genotype,binX)%>%
  mutate(dist.AP=mean(track.length),
         ciAP=ci(track.length),
         avgstartX=mean(startX))%>% # distance by starting AP
  ungroup%>%
  group_by(genotype,binY)%>%
  mutate(dist.ML=mean(track.length),
         ciML=ci(track.length),
         avgstartY=mean(startY)) # distance by starting ML

p1<-ggplot(c,aes(startX,track.length))+
  geom_ribbon(aes(avgstartX,ymin=dist.AP-ciAP,ymax=dist.AP+ciAP))+
  geom_point(aes(color=Somite), alpha=0.1)+
  facet_grid(~genotype)+
  scale_color_gradientn(colors=rainbow(7))+
  geom_line(mapping=aes(avgstartX,dist.AP), size=1)+
  labs(title="Track length by starting AP position",
       x="starting AP position", y="total track length (um)")

p.track.AP<-p1 # thesis fig
p2<-ggplot(c,aes(startY,track.length))+
  geom_ribbon(aes(avgstartY,ymin=dist.ML-ciML,ymax=dist.ML+ciML))+
  geom_point(aes(color=MLpos), alpha=0.1)+
  facet_grid(~genotype)+
  scale_color_gradientn(colors=rainbow(5))+
  geom_line(mapping=aes(avgstartY,dist.ML), size=1)+
  labs(title="Track length by starting ML position",
       x="starting ML position", y="total track length")

p.track.ML<-p2 #thesis fig


# track length by ML pos

pc<-c%>%
  group_by(genotype, MLpos)%>%
  summarize(md=mean(track.length),
            se=sd(track.length)/sqrt(length(track.length)))%>%
  ggplot(aes(x=MLpos,
                y=md))+
  scale_fill_gradientn(colors=rainbow(7))+
  geom_bar(stat="identity",aes(fill=MLpos))+ #otherwise it just does it by count
  facet_grid(~genotype)+
  ylab("track length (um)")+
  geom_errorbar(aes(ymin=md-se,ymax=md+se))+ggtitle("Final track length by ML position")


grid.arrange(pS,pc,p1,p2,nrow=2)


##### track ML movement ####

# why is the track length for somites 1,2 and 4 so much greater than wt?


a<-filter(ALL1.5, Somite==1)%>%
  ggplot(aes(Time,Y, group=interaction(Embryo, Track)))+
  geom_line(aes(color=Track))+
  facet_grid(genotype~Embryo)+xlim(0,40)

bb<-filter(ALL1.5, Somite==1)%>%
  ggplot(aes(Time,X, group=interaction(Embryo, Track)))+
  geom_line(aes(color=Track))+
  facet_grid(genotype~Embryo)+xlim(0,40)
```

In general, in looks like the cells on either edge of the limb field are the ones with the really long track length leading to the decrease in persistance. The cells in somite 3 seem to have pretty equivalent track lengths to what we see in either wt or Tbx5a deficient embryos.

## log Mean Square Displacement


* measure of the deviation of the position of a particle wrt a ref positon over time

alpha value = slope of log MSD/log(time) index for directional persistance, 1 for randomly moving cells and 2 for cells that move in a straight manner


```{r log MSD, echo=FALSE, fig.height=2}
lMSD.wt<-logMSD(LF.wt)
lMSD.5a<-logMSD(LF.5a)
lMSD.5b<-logMSD(LF.5b)
lMSD.doub<-logMSD(LF.doub)

lMSD.all<-assign.geno.all(lMSD.wt,lMSD.5a,lMSD.5b,lMSD.doub)%>%
  filter(logMSD!=-Inf)

p1<-ggplot(lMSD.all,aes(log10(Time), logMSD))+
  geom_point(aes(color=genotype))+
  theme_stat+xlim(0,2)+
  xlab("log(Time)")+
  ggtitle("lMSD vs Time")

f9<-p1

a.wt<-alpha.value(lMSD.wt)
a.5a<-alpha.value(lMSD.5a)
a.5b<-alpha.value(lMSD.5b)
a.doub<-alpha.value(lMSD.doub)

alphas<-assign.geno.all(a.wt, a.5a,a.5b, a.doub)%>%
  rename(alpha.v=a.logTime)

p2<-ggplot(alphas, aes(genotype, alpha.v))+
  geom_point(aes(color=genotype))+
  scale_color_manual(values=colorscale)+
  labs(title="Alpha values", y="alpha")+
  ylim(1,2)

grid.arrange(p1,p2, nrow=1, widths=2:1)

alphas

```
```{r make figures for pub, include=FALSE}

#ggarrange(f7, f8, f5, f6,f9, nrow=1, common.legend=TRUE)

grid.arrange(f7, f8, f5, f6,f9, f3, f4)

grid.arrange(f1, f2)


```

# References

* Useful ggplot2 tutorial  https://crerecombinase.github.io/r-intermediate-altmetrics/15-ggplot2-aes.html
* loops with file names https://stackoverflow.com/questions/4854130/how-to-iterate-over-file-names-in-a-r-script
* grid arrange https://cran.r-project.org/web/packages/egg/vignettes/Ecosystem.html
* I took the formula for the function summarySE from this website http://www.cookbook-r.com/Manipulating_data/Summarizing_data/ but that didn't actually work, but I still took the code for summarizing from there
* also for error bars on ggplots http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper functions