#! /bin/sh

#!/usr/bin/tclsh

#
# Copyright (C) 2001 by USC/ISI
# All rights reserved.
#
# Redistribution and use in source and binary forms are permitted
# provided that the above copyright notice and this paragraph are
# duplicated in all such forms and that any documentation, advertising
# materials, and other materials related to such distribution and use
# acknowledge that the software was developed by the University of
# Southern California, Information Sciences Institute.  The name of the
# University may not be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# ModelGen is a set of scripts that take tcpdump trace as input and
# output a set of CDF files that model Web traffic. It also outputs
# a time series of traffic size (in 1ms block) for further wavelet 
# scaling analysis
#
#             usage:
#                  ./ModelGen <tcpdump trace> <threshold> <network prefix>
#
#                  <tcpdump trace> : tcpdump trace file generated using
#                                    tcpdump -w option
#                  <threshold>     : the threshold time value (in millisecond)
#                                    that distinguishes idle periods in order 
#                                    to infer user "think" times between 
#                                    requests for new top-level pages.
#                  <network prefix>: network prefix used to distinguish
#                                    inbound vs. outbound traffic
#
#             example: ./ModelGen tracefile 1000 128.9
#
# Note that current it's only tested on Linux system
#
# This work is supported by DARPA through SAMAN Project
# (http://www.isi.edu/saman/), administered by the Space and Naval
# Warfare System Center San Diego under Contract No. N66001-00-C-8066
#
#
#

date
echo "***  parsing tcpdump file  ***"
echo "***  parsing .all  ***"
./tcpdump -n -tt -q -r $1  > $1.all
date
echo "***  parsing .www  ***"
./tcpdump -n -tt -r $1 tcp port 80 > $1.www
#./tcpdump -n -tt -r $1 port ftp or port ftp-data > $1.ftp
date
echo "***  parsing .ftp  ***"
./tcpdump -n -tt -r $1 port ftp  > $1.ftp1
cat $1.ftp1 | getFTPclient.pl  > $1.ftp2
cat $1.all | getFTP.pl -r $1.ftp2 > $1.ftp
#./tcpdump -q -n -tt -r $1 port ftp or port ftp-data > $1.ftps
#./tcpdump -n -tt -r $1 port domain > $1.dns
date
echo "***  parsing .http-srv  ***"
./tcpdump -n -tt -r $1 tcp src port 80 > $1.http-srv

date
echo "***  analyze traffic mix  ***"
#cat $1.all | io.pl -s $3  -w $1.all
#cat $1.all.inbound | traffic-classify > $1.traffic.cnt.inbound
#cat $1.all.outbound | traffic-classify > $1.traffic.cnt.outbound

date
echo "***  analyze flow statistics  ***"
#awk -f flow.awk < $1.all.outbound > $1.all.outbound.flow
#awk -f flow.awk < $1.all.inbound > $1.all.inbound.flow
#sort -s -o $1.all.outbound.flow.sort -T /tmp $1.all.outbound.flow
#sort -s -o $1.all.inbound.flow.sort -T /tmp $1.all.inbound.flow
#cat $1.all.outbound.flow.sort | flow.pl -w $1.outbound.flow
#cat $1.all.inbound.flow.sort | flow.pl -w $1.inbound.flow
#sort -s -o $1.inbound.flow.start.sort $1.inbound.flow.start
#sort -s -o $1.outbound.flow.start.sort $1.outbound.flow.start
#awk -f arrive2inter.awk < $1.outbound.flow.start.sort > $1.outbound.flow.arrival
#awk -f arrive2inter.awk < $1.inbound.flow.start.sort > $1.inbound.flow.arrival
#dat2cdf -e 1024 -i 1024 -d 1024 -t $1.outbound.flow.size
#dat2cdf -e 1024 -i 1024 -d 1024 -t $1.inbound.flow.size
#dat2cdf -e 0 -i 1 -d 1 -t $1.outbound.flow.dur
#dat2cdf -e 0 -i 1 -d 1 -t $1.inbound.flow.dur
#dat2cdf -e 0 -i 0.001 -d 1 -t $1.outbound.flow.arrival
#dat2cdf -e 0 -i 0.001 -d 1 -t $1.inbound.flow.arrival

date
echo "***  seperate Inbound and Outbound traffic  ***"
echo "DNS"
#io.tcl $1.dns

date
echo "WWW"
#cat $1.www | io.www.pl -s $3  -w $1.www
cat $1.http-srv | io.www.pl -s $3  -w $1.http-srv

date
echo "FTP"
#cat $1.ftp | io.ftp.pl -s $3  -w $1.ftp
#cat $1.ftps | io.ftp.pl -s $3  -w $1.ftps

##############################

date

/bin/rm -rf *.time-series
/bin/rm -rf *connect.time*

echo "***  Analyze Inbound traffic  ***"
echo "run http_connect"
sort -s -o $1.in.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $1.http-srv.inbound
http_connect -r $1.in.http-srv-sort -w $1.in.http-srv.connect
grep "ACT" $1.in.http-srv.connect > $1.in.http-srv.connect.time
sort $1.in.http-srv.connect.time > $1.in.http-srv.connect.time.sort


date
echo "run http_active"
sort -s -o $1.in.http-srv.connect.sort +1 -2 +0 -1 -T /tmp $1.in.http-srv.connect
http_active -r $1.in.http-srv.connect.sort -w $1.in.http-srv.active -I $2


date
echo "compute CDF statistics"
cat $1.in.http-srv.active.activity | outputCDF -e inbound


date
echo "compute time series (1ms block)"
bw.tcl $1.http-srv.inbound
cat $1.http-srv.inbound.bw | time-series.pl > $1.in.time-series

##############################

date
echo "***  Analyze Outbound traffic  ***"
echo "run http_connect"
sort -s -o $1.out.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $1.http-srv.outbound
http_connect -r $1.out.http-srv-sort -w $1.out.http-srv.connect
grep "ACT" $1.out.http-srv.connect > $1.out.http-srv.connect.time
sort $1.out.http-srv.connect.time > $1.out.http-srv.connect.time.sort


date
echo "run http_active"
sort -s -o $1.out.http-srv.connect.sort +1 -2 +0 -1 -T /tmp $1.out.http-srv.connect
http_active -r $1.out.http-srv.connect.sort -w $1.out.http-srv.active -I $2


date
echo "compute CDF statistics"
cat $1.out.http-srv.active.activity | outputCDF -e outbound


date
echo "compute time series (1ms block)"
bw.tcl $1.http-srv.outbound
cat $1.http-srv.outbound.bw | time-series.pl > $1.out.time-series

######################################

echo "***  Delay and Bandwidth estimation  ***"

echo "WWW traffic"

date
echo "output traffic between web servers and clients"
cat $1.www | BW-seq.pl -s $3 -p 80
sort inbound.seq -o inbound.seq.sorted
sort outbound.seq -o outbound.seq.sorted

date
echo "search for DATA/ACK packets which have the same seqence number for outboun
d traffic"
cat outbound.seq.sorted | BW-pair.pl > $1.outbound.pair

date
echo "estimate the bandwidth for inbound/outbound traffic"
cat $1.outbound.pair | BW.out.pl -w $1.www
cat inbound.seq.sorted | BW.in.pl -w $1.www

dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.outbound.BW
dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.inbound.BW
dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.outbound.delay

date
echo "Locate SYN connection"
cat $1.www | delay.pl -p 80 > $1.sync
sort -s -o $1.sync.sorted -T /tmp $1.sync

date
echo "compute delay for each SYN connection pair between servers and clients"
pair.tcl $1.sync.sorted >  $1.sync.delay
sort -s -o $1.sync.delay.sorted -T /tmp $1.sync.delay
awk -f delay.awk < $1.sync.delay.sorted > $1.www.inbound.delay

dat2cdf -e 0 -i 0.001 -d 1 -t $1.www.inbound.delay

######################################


echo "FTP traffic"

date
echo "output traffic between ftp servers and clients"
cat $1.ftp | BW-seq.pl -s $3 -p 20
sort inbound.seq -o inbound.seq.sorted
sort outbound.seq -o outbound.seq.sorted

date
echo "search for DATA/ACK packets which have the same seqence number for outboun
d traffic"
cat outbound.seq.sorted | BW-pair.pl > $1.outbound.pair

date
echo "estimate the bandwidth for inbound/outbound traffic"
cat $1.outbound.pair | BW.out.pl -w $1.ftp
cat inbound.seq.sorted | BW.in.pl -w $1.ftp

dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.outbound.BW
dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.inbound.BW
dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.outbound.delay

date
echo "Locate SYN connection"
cat $1.ftp | delay.pl -p 21 > $1.sync
sort -s -o $1.sync.sorted -T /tmp $1.sync

date
echo "compute delay for each SYN connection pair between servers and clients"
pair.tcl $1.sync.sorted >  $1.sync.delay
sort -s -o $1.sync.delay.sorted -T /tmp $1.sync.delay
awk -f delay.awk < $1.sync.delay.sorted > $1.ftp.inbound.delay

dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.inbound.delay

######################################
echo "Output statistics for FTP traffic"
date

grep ".20 >" $1.ftps.outbound | awk -f ftp.awk | sort > $1.ftp.outbound.conn
cat $1.ftp.outbound.conn | ftp.pl -w $1.ftp.outbound
sort -o $1.ftp.outbound.sess.arrive.sort $1.ftp.outbound.sess.arrive
grep ".20 >" $1.ftps.inbound | awk -f ftp.awk | sort > $1.ftp.inbound.conn
cat $1.ftp.inbound.conn | ftp.pl -w $1.ftp.inbound
sort -o $1.ftp.inbound.sess.arrive.sort $1.ftp.inbound.sess.arrive

dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.outbound.file.inter
dat2cdf -e 0 -i 1000 -d 1000 -t $1.ftp.outbound.size
dat2cdf -e 0 -i 1 -d 1 -t $1.ftp.outbound.fileno
dat2cdf -e 0 -i 0.001 -d 1 -t $1.ftp.inbound.file.inter
dat2cdf -e 0 -i 1000 -d 1000 -t $1.ftp.inbound.size
dat2cdf -e 0 -i 1 -d 1 -t $1.ftp.inbound.fileno

awk -f arrive2inter.awk < $1.ftp.outbound.sess.arrive.sort > $1.ftp.outbound.sess.inter
dat2cdf -e 0 -i 0.01 -d 1 -t $1.ftp.outbound.sess.inter
awk -f arrive2inter.awk < $1.ftp.inbound.sess.arrive.sort > $1.ftp.inbound.sess.inter
dat2cdf -e 0 -i 0.01 -d 1 -t $1.ftp.inbound.sess.inter

######################################
echo "Output TCP window size
date

echo "WWW"
grep " S " $1.www.outbound | grep ".80 >" > $1.www.outbound.svr.win
grep " S " $1.www.inbound | grep ".80 >" > $1.www.inbound.svr.win
grep " S " $1.www.outbound | grep ".80:" > $1.www.outbound.clnt.win
grep " S " $1.www.inbound | grep ".80:" > $1.www.inbound.clnt.win
awk -f win.awk < $1.www.outbound.svr.win > $1.www.outbound.wins
awk -f win.awk < $1.www.inbound.svr.win > $1.www.inbound.wins
awk -f win.awk < $1.www.outbound.clnt.win > $1.www.outbound.winc
awk -f win.awk < $1.www.inbound.clnt.win > $1.www.inbound.winc
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.outbound.wins
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.outbound.winc
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.inbound.wins
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.www.inbound.winc

echo "FTP"
grep " S " $1.ftp.outbound | grep ".20 >" > $1.ftp.outbound.svr.win
grep " S " $1.ftp.inbound | grep ".20 >" > $1.ftp.inbound.svr.win
grep " S " $1.ftp.outbound | grep ".20:" > $1.ftp.outbound.clnt.win
grep " S " $1.ftp.inbound | grep ".20:" > $1.ftp.inbound.clnt.win
awk -f win.awk < $1.ftp.outbound.svr.win > $1.ftp.outbound.wins
awk -f win.awk < $1.ftp.inbound.svr.win > $1.ftp.inbound.wins
awk -f win.awk < $1.ftp.outbound.clnt.win > $1.ftp.outbound.winc
awk -f win.awk < $1.ftp.inbound.clnt.win > $1.ftp.inbound.winc
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.outbound.wins
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.outbound.winc
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.inbound.wins
dat2cdf -e 1024 -i 1024 -d 1024 -t $1.ftp.inbound.winc


date
echo "execution complete"
