dk_interested.bib
@ARTICLE{Adelberg1998,
AUTHOR = {Brad Adelberg},
TITLE = {NoDoSE---A Tool for Semi-Automatically Extracting Structured and
Semistructured Data from Text Documents},
JOURNAL = {SIGMOD Record},
YEAR = {1998},
VOLUME = {27},
PAGES = {283--294}
}
@TECHREPORT{Adelberg1999,
AUTHOR = {Brad Adelberg and Matt Denny},
TITLE = {Building Robust Wrappers for Text Sources},
INSTITUTION = {Computer Science Department, Northwestern University},
YEAR = {1999},
NOTE = {Technical Report},
ABSTRACT = {Many data sources, including web sites, do not support general query
interfaces. The typical solution is to build a wrapper around the
source which presents a general query interface to the underlying
data by translating external queries to a form the local source
understands, submitting the local query, and then repackaging the
results in a generic way before returning them to the caller. This
approach allows heterogeneous query processors to be built on top
of underlying source over which one ...}
}
@TECHREPORT{Ahuja2000,
AUTHOR = {Abha Ahuja and Craig Labovitz and Srinivasan Venkatachary and Roger
Wattenhofer},
TITLE = {The Impact of Internet Policy and Topology on Delayed Routing Convergence},
INSTITUTION = {Microsoft},
YEAR = {2000},
NUMBER = {MSR-TR-2000-74},
MONTH = {July 2000},
ABSTRACT = {This paper examines the roles of inter-domain topology and routing
policy in the process of delayed Internet routing convergence. In
recent work, we found that the Internet lacks effective inter-domain
path fail-over. Unlike switches in the public telephony network
which exhibit fail-over on the order of milliseconds, we showed
Internet backbone routers may take tens of minutes to reach a consistent
view of the network topology after a fault. In this paper, we expand
on our earlier work by exploring the impact of specific Internet
provider policies and topologies on the speed of routing convergence.
Based on data from the experimental injection and measurement of
several hundred thousand inter-domain routing faults, we show that
the time for end-to-end Internet convergence depends on the length
of the longest possible backup autonomous system path between a
source and destination node. We also demonstrate significant variation
in the convergence behaviors of Internet service providers, with
the larger providers exhibiting the fastest convergence latencies.
Finally, we discuss possible modifications to BGP and provider routing
policies which if deployed, would improve inter-domain routing convergence.}
}
@INPROCEEDINGS{Alon1996,
AUTHOR = {Noga Alon and Yossi Matias and Mario Szegedy},
TITLE = {The space complexity of approximating the frequency moments},
BOOKTITLE = {28th ACM Symp. on Theory of Computing},
YEAR = {1996},
PAGES = {20--29},
ABSTRACT = {The frequency moments of a sequence containing m i elements of type
i, for 1 i n, are the numbers Fk = P n i=1 m k i . We consider the
space complexity of randomized algorithms that approximate the numbers
Fk , when the elements of the sequence are given one by one and
cannot be stored. Surprisingly, it turns out that the numbers F0
; F1 and F2 can be approximated in logarithmic space, whereas the
approximation of Fk for k 6 requires n\Omega\Gamma31 space. Applications
to data bases are...}
}
@TECHREPORT{SRI-CSL-95-06,
AUTHOR = {Debra Anderson and Teresa {F.} Lunt and Harold Javitz and Ann Tamaru
and Alfonso Valdes},
TITLE = {Detecting Unusual Program Behavior Using the Statistical Component
of the Next-generation Intrusion Detection Expert System ({N}{I}{D}{E}{S})},
INSTITUTION = {Computer Science Laboratory, {SRI} International},
YEAR = {1995},
NUMBER = {{SRI-CSL-95-06}},
ADDRESS = {Menlo Park, {CA}},
MONTH = {May},
URL = {http://www.csl.sri.com/papers/sri-csl-95-06/}
}
@INPROCEEDINGS{Carson2004,
AUTHOR = {Carson Andorf and Adrian Silvescu and Drena Dobbs and Vasant Honavar},
TITLE = {Learning Classifiers for Assigning Protein Sequences to Gene Ontology
Functional Families},
BOOKTITLE = {Fifth International Conference on Knowledge Based Computer Systems
(KBCS 2004)},
YEAR = {2004},
PAGES = {256--265},
LOCATION = {Hyderabad, India}
}
@INPROCEEDINGS{Anton2005,
AUTHOR = {Tobias Anton},
TITLE = {XPath-Wrapper Induction by generating tree traversal patterns},
BOOKTITLE = {Lernen, Wissensentdeckung und Adaptivit{\"a}t (LWA) 2005, GI Workshops,
Saarbr{\"u}cken},
YEAR = {2005},
PAGES = {126-133},
MONTH = {October},
PUBLISHER = {DFKI},
OWNER = {dkkang},
TIMESTAMP = {2006.06.12}
}
@INPROCEEDINGS{APTE94b,
AUTHOR = {Chidanand Apt{\'e} and Fred Damerau and Sholom M. Weiss},
TITLE = {Towards language independent automated learning of text categorization
models},
BOOKTITLE = {SIGIR '94: Proceedings of the 17th annual international ACM SIGIR
conference on Research and development in information retrieval},
YEAR = {1994},
PAGES = {23--30},
ADDRESS = {New York, NY, USA},
PUBLISHER = {Springer-Verlag New York, Inc.},
ISBN = {0-387-19889-X},
LOCATION = {Dublin, Ireland}
}
@BOOK{arndt2001,
TITLE = {Information Measures},
PUBLISHER = {Springer-Verlag Telos},
YEAR = {2001},
AUTHOR = {Christoph Arndt}
}
@INPROCEEDINGS{DBLP:conf/kdd/AronisP97,
AUTHOR = {John M. Aronis and Foster J. Provost},
TITLE = {Increasing the Efficiency of Data Mining Algorithms with Breadth-First
Marker Propagation.},
BOOKTITLE = {Proceedings of the Third International Conference on Knowledge Discovery
and Data Mining (KDD-97), Newport Beach, California, USA, August
14-17, 1997. AAAI Press, 1997},
YEAR = {1997},
EDITOR = {David Heckerman and Heikki Mannila and Daryl Pregibon},
PAGES = {119-122},
ISBN = {1-57735-027-8}
}
@ARTICLE{Ashburner2000,
AUTHOR = {Ashburner, M. and Ball, C.A. and Blake, J.A. and Botstein, D. and
Butler, H. and Cherry, J.M. and Davis, A.P. and Dolinski, K. and
Dwight, S.S. and Eppig, J.T. and Harris, M.A. and Hill, D.P. and
Issel-Tarver, L. and Kasarskis, A. and Lewis, S. and Matese, J.C.
and Richardson, J.E. and Ringwald, M. and Rubin, G.M. and Sherlock,
G.},
TITLE = {Gene ontology: tool for the unification of biology. {T}he {G}ene
{O}ntology {C}onsortium.},
JOURNAL = {Nature Genetics},
YEAR = {2000},
VOLUME = {25},
PAGES = {25-29},
NUMBER = {1}
}
@INPROCEEDINGS{Ashish1997,
AUTHOR = {Naveen Ashish and Craig Knoblock},
TITLE = {Wrapper Generation for Semi-structured Internet Sources},
BOOKTITLE = {Workshop on Management of Semistructured Data},
YEAR = {1997},
ADDRESS = {Tucson, Arizona}
}
@INPROCEEDINGS{ILP03-Atramentov,
AUTHOR = {Atramentov, A. and Leiva, H. and Honavar, V.},
TITLE = {A Multi-Relational Decision Tree Learning Algorithm - Implementation
and Experiments},
BOOKTITLE = {ILP03},
YEAR = {2003},
EDITOR = {T.~Horv{\'a}th and A.~Yamamoto},
VOLUME = {2835},
SERIES = {LNAI},
PAGES = {38--56},
PUBLISHER = {Springer-Verlag},
ISBN = {3-540-20144-0}
}
@INPROCEEDINGS{anna2003,
AUTHOR = {Anna Atramentov and Vasant Honavar},
TITLE = {Speeding Up Multi-Relational Data Mining},
BOOKTITLE = {Workshop on Learning Statistical Models from Relational Data at 2003
International Joint Conference on Artificial Intelligence (IJCAI
2003)},
YEAR = {2003}
}
@INPROCEEDINGS{DBLP:conf/ilp/AtramentovLH03,
AUTHOR = {Anna Atramentov and Hector Leiva and Vasant Honavar},
TITLE = {A Multi-relational Decision Tree Learning Algorithm - Implementation
and Experiments.},
BOOKTITLE = {Inductive Logic Programming (ILP) : 13th International Conference,
ILP 2003, Szeged, Hungary, September 29-October 1, 2003, Proceedings},
YEAR = {2003},
VOLUME = {2835},
SERIES = {Lecture Notes in Computer Science},
PAGES = {38-56},
PUBLISHER = {Springer},
BIBSOURCE = {DBLP, http://dblp.uni-trier.de},
ISBN = {3-540-20144-0}
}
@TECHREPORT{Axelsson2000,
AUTHOR = {Stefan Axelsson},
TITLE = {Intrusion Detection Systems: A Survey and Taxonomy},
INSTITUTION = {Chalmers Univ.},
YEAR = {2000},
NUMBER = {99-15},
MONTH = MAR,
URL = {http://citeseer.nj.nec.com/axelsson00intrusion.html}
}
@ARTICLE{Bach-y-Rita2003,
AUTHOR = {Paul Bach-Y-Rita and Stephen W. Kercel},
TITLE = {Sensory Substitution and the Human-Machine Interface},
JOURNAL = {Trends in Cognitive Science},
YEAR = {2003},
VOLUME = {7},
PAGES = {541-546},
NUMBER = {12},
MONTH = {December}
}
@ARTICLE{bairoch00swissprot,
AUTHOR = {Amos Bairoch and Rolf Apweiler},
TITLE = {The {SWISS}-{PROT} protein sequence database and its supplement {T}r{EMBL}
in 2000},
JOURNAL = {Nucleic Acids Res.},
YEAR = {2000},
VOLUME = {28},
PAGES = {45--48},
HOWPUBLISHED = {\url{http://www.expasy.ch/enzyme/}},
URL = {citeseer.ist.psu.edu/bairoch00swissprot.html}
}
@ARTICLE{Bajcsy1976,
AUTHOR = {R. Bajcsy and M. Tavakoli},
TITLE = {Computer Recognition of Roads from Satellite Pictures},
JOURNAL = {IEEE Transactions on Systems, Man and Cybernetics},
YEAR = {1976},
VOLUME = {6},
PAGES = {623--637},
NUMBER = {9}
}
@INPROCEEDINGS{Baker1998,
AUTHOR = {L. Douglas Baker and Andrew Kachites McCallum},
TITLE = {Distributional clustering of words for text classification},
BOOKTITLE = {Proceedings of the 21st annual international ACM SIGIR conference
on Research and development in information retrieval},
YEAR = {1998},
PAGES = {96--103},
PUBLISHER = {ACM Press},
DOI = {http://doi.acm.org/10.1145/290941.290970},
ISBN = {1-58113-015-5},
LOCATION = {Melbourne, Australia}
}
@ARTICLE{BaldiBCAN00,
AUTHOR = {Pierre Baldi and S{\o}ren Brunak and Yves Chauvin and Claus A. F.
Andersen and Henrik Nielsen},
TITLE = {Assessing the accuracy of prediction algorithms for classification:
an overview.},
JOURNAL = {Bioinformatics},
YEAR = {2000},
VOLUME = {16},
PAGES = {412-424},
NUMBER = {5}
}
@ARTICLE{Barbara2002,
AUTHOR = {Daniel Barbara},
TITLE = {Requirements for clustering data streams},
JOURNAL = {ACM SIGKDD Explorations Newsletter},
YEAR = {2002},
VOLUME = {3},
PAGES = {23 - 27},
NUMBER = {2},
ABSTRACT = {Scientific and industrial examples of data streams abound in astronomy,
telecommunication operations, banking and stock-market applications,
e-commerce and other fields. A challenge imposed by continuously
arriving data streams is to analyze them and to modify the models
that explain them as new data arrives. In this paper, we analyze
the requirements needed for clustering data streams. We review some
of the latest algorithms in the literature and assess if they meet
these requirements.}
}
@ARTICLE{Barnard2003,
AUTHOR = {Kobus Barnard and Pinar Duygulu and David Forsyth and Nando de Freitas
and David Blei and Michael Jordan},
TITLE = {Matching Words and Pictures},
JOURNAL = {The Journal of Machine Learning Research},
YEAR = {2003},
VOLUME = {3},
PAGES = {1107 - 1135},
ABSTRACT = {We present a new approach for modeling multi-modal data sets, focusing
on the specific case of segmented images with associated text. Learning
the joint distribution of image regions and words has many applications.
We consider in detail predicting words associated with whole images
(auto-annotation) and corresponding to particular image regions
(region naming). Auto-annotation might help organize and access
large collections of images. Region naming is a model of object
recognition as a process of translating image regions to words,
much as one might translate from one language to another. Learning
the relationships between image regions and semantic correlates
(words) is an interesting example of multi-modal data mining, particularly
because it is typically hard to apply data mining techniques to
collections of images. We develop a number of models for the joint
distribution of image regions and words, including several which
explicitly learn the correspondence between regions and words. We
study multi-modal and correspondence extensions to Hofmann's hierarchical
clustering/aspect model, a translation model adapted from statistical
machine translation (Brown et al.), and a multi-modal extension
to mixture of latent Dirichlet allocation (MoM-LDA). All models
are assessed using a large collection of annotated images of real
scenes. We study in depth the difficult problem of measuring performance.
For the annotation task, we look at prediction performance on held
out data. We present three alternative measures, oriented toward
different types of task. Measuring the performance of correspondence
methods is harder, because one must determine whether a word has
been placed on the right region of an image. We can use annotation
performance as a proxy measure, but accurate measurement requires
hand labeled data, and thus must occur on a smaller scale. We show
results using both an annotation proxy, and manually labeled data.}
}
@ARTICLE{Barsalou1983,
AUTHOR = {L.W. Barsalou},
TITLE = {Ad hoc categories},
JOURNAL = {Memory \& Cognition},
YEAR = {1983},
VOLUME = {11},
PAGES = {211--227},
NUMBER = {3}
}
@INPROCEEDINGS{Beaudouin-Lafon2000,
AUTHOR = {Michel Beaudouin-Lafon},
TITLE = {Instrumental interaction: an interaction model for designing post-WIMP
user interfaces},
BOOKTITLE = {the SIGCHI conference on Human factors in computing systems},
YEAR = {2000},
PAGES = {446 - 453},
ADDRESS = {The Hague, The Netherlands},
ABSTRACT = {This article introduces a new interaction model called Instrumental
Interaction that extends and generalizes the principles of direct
manipulation. It covers existing interaction styles, including traditional
WIMP interfaces, as well as new interaction styles such as two-handed
input and augmented reality. It defines a design space for new interaction
techniques and a set of properties for comparing them. Instrumental
Interaction describes graphical user interfaces in terms of domain
objects and interaction instruments. Interaction between users and
domain objects is mediated by interaction instruments, similar to
the tools and instruments we use in the real world to interact with
physical objects. The article presents the model, applies it to
describe and compare a number of interaction techniques, and shows
how it was used to create a new interface for searching and replacing
text.}
}
@INPROCEEDINGS{Bekkerman2001,
AUTHOR = {Ron Bekkerman and Ran El-Yaniv and Naftali Tishby and Yoad Winter},
TITLE = {On feature distributional clustering for text categorization},
BOOKTITLE = {the 24th annual international ACM SIGIR conference on Research and
development in information retrieval},
YEAR = {2001},
PAGES = {146 - 153},
ADDRESS = {New Orleans, Louisiana, United States},
ABSTRACT = {We describe a text categorization approach that is based on a combination
of feature distributional clusters with a support vector machine
(SVM) classifier. Our feature selection approach employs distributional
clustering of words via the recently introducedinformation bottleneck
method, which generates a more efficientword-clusterrepresentation
of documents. Combined with the classification power of an SVM,
this method yields high performance text categorization that can
outperform other recent methods in terms of categorization accuracy
and representation efficiency. Comparing the accuracy of our method
with other techniques, we observe significant dependency of the
results on the data set. We discuss the potential reasons for this
dependency.}
}
@BOOK{Bergin2004,
TITLE = {Karel The Robot: A Gentle Introduction to the Art of Programming},
PUBLISHER = {Wiley},
YEAR = {1994},
AUTHOR = {Joseph Bergin and Mark Stehlik and Jim Roberts and Richard Pattis},
EDITION = {2nd Edition},
HOWPUBLISHED = {Retrieved from http://csis.pace.edu/\~{}bergin/KarelJava2ed/karelexperimental.html},
OWNER = {dkkang},
TIMESTAMP = {2005.12.20}
}
@ARTICLE{BernersLee2001,
AUTHOR = {Berners-Lee, Tim and Hendler, James and Lassila, Ora},
TITLE = {The Semantic Web},
JOURNAL = {Scientific American},
YEAR = {2001},
MONTH = {May},
URL = { http://www.sciam.com/article.cfm?articleID=00048144-10D2-1C70-84A9809EC588EF21}
}
@INPROCEEDINGS{Bernstein2003,
AUTHOR = {Daniel S. Bernstein and Zhengzhu Feng and Brian Neil Levine and Shlomo
Zilberstein},
TITLE = {Adaptive Peer Selection},
BOOKTITLE = {the 2nd International Workshop on Peer-to-Peer Systems (IPTPS)},
YEAR = {2003},
ADDRESS = {Berkeley, California}
}
@BOOK{Bishop1996,
TITLE = {Neural networks for pattern recognition},
PUBLISHER = {Oxford University Press},
YEAR = {1996},
AUTHOR = {Christopher M. Bishop},
ISBN = {0-19-853849-9}
}
@INPROCEEDINGS{bishop95standard,
AUTHOR = {Matt Bishop},
TITLE = {A Standard Audit Trail Format},
BOOKTITLE = {Proceedings of 18th {NIST}-{NCSC} National Information Systems Security
Conference},
YEAR = {1995},
PAGES = {136--145},
URL = {citeseer.ist.psu.edu/bishop95standard.html}
}
@MISC{Blake+Merz:1998,
AUTHOR = {C.L. Blake and C.J. Merz},
TITLE = {{UCI} Repository of machine learning databases},
YEAR = {1998},
INSTITUTION = {University of California, Irvine, Dept. of Information and Computer
Sciences},
URL = {http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html}
}
@ARTICLE{Bloom1970,
AUTHOR = {Burton Bloom},
TITLE = {Space/time trade-offs in hash coding with allowable errors},
JOURNAL = {Communications of the ACM},
YEAR = {1970},
VOLUME = {13},
PAGES = {422-426},
NUMBER = {7},
ABSTRACT = {In this paper trade-offs among certain computational factors in hash
coding are analyzed. The paradigm problem considered is that of
testing a series of messages one-by-one for membership in a given
set of messages. Two new hash-coding methods are examined and compared
with a particular conventional hash-coding method. The computational
factors considered are the size of the hash area (space), the time
required to identify a message as a nonmember of the given set (reject
time), and an allowable error frequency. The new methods are intended
to reduce the amount of space required to contain the hash-coded
information from that associated with conventional methods. The
reduction in space is accomplished by exploiting the possibility
that a small fraction of errors of commission may be tolerable in
some applications, in particular, applications in which a large
amount of data is involved and a core resident hash area is consequently
not feasible using conventional methods. In such applications, it
is envisaged that overall performance could be improved by using
a smaller core resident hash area in conjunction with the new methods
and, when necessary, by using some secondary and perhaps time-consuming
test to “catch” the small fraction of errors associated with the
new methods. An example is discussed which illustrates possible
areas of application for the new methods. Analysis of the paradigm
problem demonstrates that allowing a small number of test messages
to be falsely identified as members of the given set will permit
a much smaller hash area to be used without increasing reject time.}
}
@INPROCEEDINGS{Blum1990,
AUTHOR = {Avrim Blum},
TITLE = {Learning boolean functions in an infinite attribute space},
BOOKTITLE = {the twenty-second annual ACM symposium on Theory of computing},
YEAR = {1990},
PAGES = {64-72},
ADDRESS = {Baltimore, Maryland, United States},
PUBLISHER = {ACM Press, New York, NY, USA}
}
@INPROCEEDINGS{Blum1994,
AUTHOR = {Avrim Blum and Merrick Furst and Jeffrey Jackson and Michael Kearns
and Yishay Mansour},
TITLE = {Weakly Learning DNF and Characterizing Statistical Query Learning
Using Fourier Analysis},
BOOKTITLE = {the 26th ACM Symposium on the Theory of Computing},
YEAR = {1994},
PAGES = {253-262},
ADDRESS = {New York, NY},
PUBLISHER = {ACM Press}
}
@ARTICLE{Blumer1989,
AUTHOR = {Anselm Blumer and Andrzej Ehrenfeucht and David Haussler and Manfred
K. Warmuth},
TITLE = {Learnability and the Vapnik-- Chervonenkis dimension},
JOURNAL = {Journal of the ACM},
YEAR = {1989},
VOLUME = {36},
PAGES = {929-965},
NUMBER = {4}
}
@INPROCEEDINGS{Board1990,
AUTHOR = {Raymond Board and Leonard Pitt},
TITLE = {On the necessity of Occam algorithms},
BOOKTITLE = {the Twenty Second Annual ACM Symposium on Theory of Computing},
YEAR = {1990},
PAGES = {54-63},
ADDRESS = {Baltimore, Maryland}
}
@BOOK{Borenstein1996,
TITLE = {Navigating Mobile Robots: Systems and Techniques},
PUBLISHER = {AK Peters, Ltd.},
YEAR = {1996},
AUTHOR = {J. Borenstein and H. R. Everett and Liqiang Feng},
ISBN = {156881058X}
}
@INPROCEEDINGS{ILP99-Bostrom-Asker,
AUTHOR = {H. Bostr{\"o}m and L. Asker},
TITLE = {Combining Divide-and-Conquer and Separate-and-Conquer for Efficient
and Effective Rule Induction},
BOOKTITLE = {Proceedings of the 9th International Workshop on Inductive Logic
Programming (ILP99)},
YEAR = {1999},
EDITOR = {S. D\v{z}eroski and P. Flach},
VOLUME = {1634},
SERIES = {Lecture Notes in Artificial Intelligence (LNAI)},
PAGES = {33--43},
PUBLISHER = {Springer-Verlag},
ISBN = {3-54066-109-3}
}
@INPROCEEDINGS{Bowling2003,
AUTHOR = {Michael Bowling and Michael Littman},
TITLE = {Multiagent Learning: A Game Theoretic Perspective},
BOOKTITLE = {The 2003 International Joint Conference on Artificial Intelligence},
YEAR = {2003},
NOTE = {Tutorial}
}
@BOOK{Braitenberg1986,
TITLE = {Vehicles: Experiments in Synthetic Psychology},
PUBLISHER = {The MIT Press; Reprint edition},
YEAR = {1986},
AUTHOR = {Valentino Braitenberg},
MONTH = {Febrary},
ISBN = {262521121}
}
@ARTICLE{Brin1998,
AUTHOR = {Sergey Brin and Lawrence Page},
TITLE = {The Anatomy of a Large-Scale Hypertextual Web Search Engine},
JOURNAL = {Computer Networks and ISDN Systems},
YEAR = {1998},
VOLUME = {30},
PAGES = {107--117},
NUMBER = {1-7}
}
@BOOK{Brooks2002,
TITLE = {Flesh and Machines: How Robots Will Change Us},
PUBLISHER = {Pantheon; 1st edition},
YEAR = {2002},
AUTHOR = {Rodney Brooks},
MONTH = {Febrary},
ISBN = {375420797}
}
@BOOK{Brooks1999,
TITLE = {Cambrian Intelligence: The Early History of the New AI},
PUBLISHER = {The MIT Press},
YEAR = {1999},
AUTHOR = {Rodney Brooks},
MONTH = {July},
ISBN = {262522632}
}
@TECHREPORT{brown94vision,
AUTHOR = {Christopher M. Brown},
TITLE = {Vision, Learning, and Development},
INSTITUTION = {The University of Rochester, Computer Science Department},
YEAR = {1994},
NUMBER = {TR492},
MONTH = {Febrary}
}
@INPROCEEDINGS{Brown2000,
AUTHOR = {Michael P. S. Brown and William Noble Grundy and David Lin and Nello
Cristianini and Charles Sugnet and Terrence S. Furey and Manuel
Ares, Jr. and David Haussler and Michael Kearns and Nick Littlestone
and Manfred K. Warmuth},
TITLE = {Knowledge-based Analysis of Microarray Gene Expression Data Using
Support Vector Machines},
BOOKTITLE = {the National Academy of Sciences},
YEAR = {2000},
VOLUME = {97},
PAGES = {262-267},
ABSTRACT = {We introduce a new method of functionally classifying genes using
gene expression data from DNA microarray hybridization experiments.
The method is based on the theory of support vector machines. SVMs
are considered a supervised computer learning method because they
exploit prior knowledge of gene function to identify unknown genes
of similar function from expression data. SVMs avoid several problems
associated with unsupervised clustering methods such as hierarchical
clustering methods and self organizing maps. SVMs have many mathematical
features that make them attractive for gene expression analysis,
including their flexibility in choosing a similarity function, sparseness
of solution when dealing with large data sets, the ability to handle
large feature spaces, and the ability to identify outliers. We test
several SVMs that use different similarity metrics, as well as some
other supervised learning methods, and find that the SVMs best identify
sets of genes with a common function using expression data. Finally,
we use SVMs to predict functional roles for uncharacterized yeast
ORFs based on their expression data.}
}
@INPROCEEDINGS{Buja2001,
AUTHOR = {Andreas Buja and Yung-Seop Lee},
TITLE = {Data mining criteria for tree-based regression and classification},
BOOKTITLE = {the seventh ACM SIGKDD international conference on Knowledge discovery
and data mining},
YEAR = {2001},
PAGES = {27 - 36},
ADDRESS = {San Francisco, California},
ABSTRACT = {This paper is concerned with the construction of regression and classification
trees that are more adapted to data mining applications than conventional
trees. To this end, we propose new splitting criteria for growing
trees. Conventional splitting criteria attempt to perform well on
both sides of a split by attempting a compromise in the quality
of fit between the left and the right side. By contrast, we adopt
a data mining point of view by proposing criteria that search for
interesting subsets of the data, as opposed to modeling all of the
data equally well. The new criteria do not split based on a compromise
between the left and the right bucket; they effectively pick the
more interesting bucket and ignore the other.As expected, the result
is often a simpler characterization of interesting subsets of the
data. Less expected is that the new criteria often yield whole trees
that provide more interpretable data descriptions. Surprisingly,
it is a "flaw" that works to their advantage: The new criteria have
an increased tendency to accept splits near the boundaries of the
predictor ranges. This so-called "end-cut problem" leads to the
repeated peeling of small layers of data and results in very unbalanced
but highly expressive and interpretable trees.}
}
@ARTICLE{Burbea1982a,
AUTHOR = {Burbea, J. and Rao, C. R.},
TITLE = {Entropy Differential Metric, Distance and Divergence Measures in
Probability Spaces: A Unified Approach},
JOURNAL = {J. Multi. Analysis},
YEAR = {1982},
VOLUME = {12},
PAGES = {575-596}
}
@ARTICLE{Burbea1982b,
AUTHOR = {Burbea, J. and Rao, C. R.},
TITLE = {On the Convexity of Some Divergence Measures Based on Entropy Functions},
JOURNAL = {IEEE Trans. on Inform. Theory},
YEAR = {1982},
VOLUME = {IT-28},
PAGES = {489-495}
}
@BOOK{Burnham02,
TITLE = {Model Selection and Multi-Model Inference},
PUBLISHER = {Springer},
YEAR = {2002},
AUTHOR = {Kenneth P. Burnham and David Anderson},
EDITION = {2},
MONTH = {July}
}
@INPROCEEDINGS{Byrd1999,
AUTHOR = {Donald Byrd},
TITLE = {A Scrollbar-based Visualization for Document Navigation},
BOOKTITLE = {the Fourth ACM International Conference on Digital Libraries},
YEAR = {1999},
ADDRESS = {Berkeley, CA}
}
@ARTICLE{DBLP:journals/candc/CaiLC02,
AUTHOR = {Yu-Dong Cai and Xiao-Jun Liu and Kuo-Chen Chou},
TITLE = {Artificial Neural Network Model for Predicting Protein Subcellular
Location.},
JOURNAL = {Computers {\&} Chemistry},
YEAR = {2002},
VOLUME = {26},
PAGES = {179-182},
NUMBER = {2},
BIBSOURCE = {DBLP, http://dblp.uni-trier.de},
EE = {http://dx.doi.org/10.1016/S0097-8485(01)00106-1}
}
@ARTICLE{Cancedda2003,
AUTHOR = {Nicola Cancedda and Eric Gaussier and Cyril Goutte and Jean Michel
Renders},
TITLE = {Word sequence kernels},
JOURNAL = {The Journal of Machine Learning Research},
YEAR = {2003},
VOLUME = {3},
PAGES = {1059 - 1082},
NUMBER = {Special issue}
}
@ARTICLE{Capelle1998,
AUTHOR = {C. Capelle and C. Trullemans and P. Arno and C. Veraart},
TITLE = {A Real-Time Experimental Prototype for Enhancement of Vision Rehabilitation
Using Auditory Substitution},
JOURNAL = {IEEE Trans. Biomed. Eng.},
YEAR = {1998},
VOLUME = {45},
PAGES = {1279-1293},
MONTH = {October}
}
@INPROCEEDINGS{CarageaRSH03,
AUTHOR = {Doina Caragea and Jaime Reinoso and Adrian Silvescu and Vasant Honavar},
TITLE = {Statistics Gathering for Learning from Distributed, Heterogeneous
and Autonomous Data Sources.},
BOOKTITLE = {Proceedings of IJCAI-03 Workshop on Information Integration on the
Web (IIWeb-03), August 9-10, 2003, Acapulco, Mexico},
YEAR = {2003},
PAGES = {99-104},
BIBSOURCE = {DBLP, http://dblp.uni-trier.de},
EE = {http://www.isi.edu/info-agents/workshops/ijcai03/papers/caragea1.pdf}
}
@INCOLLECTION{Caragea2001,
AUTHOR = {Doina Caragea and Adrian Silvescu and Vasant Honavar},
TITLE = {Towards a Theoretical Framework for Analysis and Synthesis of Agents
That Learn from Distributed Dynamic Data Sources},
BOOKTITLE = {Emerging Neural Architectures Based on Neuroscience},
PUBLISHER = {Springer-Verlag.},
YEAR = {2001},
VOLUME = {Invited Chapter},
ADDRESS = {Berlin}
}
@ARTICLE{Cessie1992,
AUTHOR = {S. Le Cessie and JC Van Houwelingen},
TITLE = {Ridge Estimators in Logistic Regression},
JOURNAL = {Applied Statistics},
YEAR = {1992},
VOLUME = {41},
PAGES = {191--201},
NUMBER = {1}
}
@TECHREPORT{Chaturvedi2005,
AUTHOR = {Abhishek Chaturvedi and Sandeep Bhatkar and R. Sekar},
TITLE = {Improving Attack Detection in Host-Based IDS by Learning Properties
of System Call Arguments},
INSTITUTION = {Department of Computer Science, Stony Brook University},
YEAR = {2005},
NUMBER = {SECLAB-05-03},
MONTH = {July},
OWNER = {dkkang},
TIMESTAMP = {2006.01.18}
}
@INPROCEEDINGS{Cho2002,
AUTHOR = {J. Cho and H. Garcia-Molina},
TITLE = {Parallel Crawlers},
BOOKTITLE = {11th International World-Wide Web Conference},
YEAR = {2002}
}
@INPROCEEDINGS{Cimiano2004,
AUTHOR = {Philipp Cimiano and Andreas Hotho and Steffen Staab},
TITLE = {Comparing Conceptual, Partitional and Agglomerative Clustering for
Learning Taxonomies from Text},
BOOKTITLE = {Proceedings of the European Conference on Artificial Intelligence
(ECAI'04)},
YEAR = {2004},
URL = {http://www.aifb.uni-karlsruhe.de/WBS/pci/ecai04.pdf}
}
@INPROCEEDINGS{Cimiano2003,
AUTHOR = {Philipp Cimiano and Steffen Staab and Julien Tane},
TITLE = {Automatic Acquisition of Taxonomies from Text: FCA meets NLP},
BOOKTITLE = {Proceedings of the ECML/PKDD Workshop on Adaptive Text Extraction
and Mining, Cavtat--Dubrovnik, Croatia},
YEAR = {2003},
PAGES = {10--17},
URL = {http://www.aifb.uni-karlsruhe.de/WBS/pci/ontolearning.pdf}
}
@BOOK{Clark2003,
TITLE = {Natural-Born Cyborgs: Minds, Technologies, and the Future of Human
Intelligence},
PUBLISHER = {Oxford University Press},
YEAR = {2004},
AUTHOR = {Andy Clark}
}
@INPROCEEDINGS{Cohen1998,
AUTHOR = {William W. Cohen},
TITLE = {A Web-based Information System that Reasons with Structured Collections
of Text},
BOOKTITLE = {the 2nd International Conference on Autonomous Agents (Agents'98)},
YEAR = {1998},
PAGES = {400--407},
ADDRESS = {New York},
ABSTRACT = {The degree to which information sources are pre-processed by Web-based
information systems varies greatly. In search engines like Altavista,
little pre-processing is done, while in "knowledge integration"
systems, complex site-specific "wrappers" are used integrate different
information sources into a common database representation. In this
paper we describe an intermediate between these two models. In our
system, information sources are converted into a highly structured
collection of small...}
}
@INPROCEEDINGS{cohen95fast,
AUTHOR = {William W. Cohen},
TITLE = {Fast Effective Rule Induction},
BOOKTITLE = {Proc. of the 12th International Conference on Machine Learning},
YEAR = {1995},
EDITOR = {Armand Prieditis and Stuart Russell},
PAGES = {115--123},
ADDRESS = {Tahoe City, CA},
MONTH = {July},
PUBLISHER = {Morgan Kaufmann},
ISBN = {1-55860-377-8},
URL = {http://citeseer.nj.nec.com/cohen95fast.html}
}
@INPROCEEDINGS{Collins2001,
AUTHOR = {Michael Collins and Sanjoy Dasgupta and Robert E. Schapire},
TITLE = {A Generalization of Principal Component Analysis to the Exponential
Family},
BOOKTITLE = {NIPS},
YEAR = {2001},
ABSTRACT = {Principal component analysis (PCA) is a commonly applied technique
for dimensionality reduction. PCA implicitly minimizes a squared
loss function, which may be inappropriate for data that is not real-valued,
such as binary-valued data. This paper draws on ideas from the Exponential
family, Generalized linear models, and Bregman distances, to give
a generalization of PCA to loss functions that we argue are better
suited to other data types. We describe algorithms for minimizing
the loss...}
}
@ARTICLE{Cortes-Vapnik,
AUTHOR = {Corinna Cortes and Vladimir Vapnik},
TITLE = {Support-Vector Networks},
JOURNAL = {Mach. Learn.},
YEAR = {1995},
VOLUME = {20},
PAGES = {273--297},
NUMBER = {3},
ISSN = {0885-6125},
PUBLISHER = {Kluwer Academic Publishers}
}
@INPROCEEDINGS{Coull2003,
AUTHOR = {Scott Coull and Joel Branch and Boleslaw Szymanski and Eric Breimer},
TITLE = {Intrusion Detection: A Bioinformatics Approach},
BOOKTITLE = {19th Annual Computer Security Applications Conference},
YEAR = {2003},
ADDRESS = {Las Vegas, Nevada},
ABSTRACT = {This paper addresses the problem of detecting masquerading, a security
attack in which an intruder assumes the identity of a legitimate
user. Many approaches based on Hidden Markov Models and various
forms of Finite State Automata were proposed to solve this problem.
The novelty of our approach results from application of techniques
used in bioinformatics for a pair-wise sequence alignment to compare
the monitored session with the past user behavior. Our algorithm
uses a semi-global alignment and a unique scoring system to measure
similarity between a sequence of commands produced by a potential
intruder and the user signature, which is a sequence of commands
collected from a legitimate user. We tested this algorithm on the
standard intrusion data collection set. As discussed in the paper,
the results of the test showed that the described algorithm yields
a promising combination of intrusion detection rate and false positive
rate, when compared to the published intrusion detection algorithms.},
KEYWORDS = {Intrusion detection, sequence alignment, bioinformatics, masquerade
detection, pattern matching}
}
@BOOK{Crain1991,
TITLE = {Theories of Development: Concepts and Applications},
PUBLISHER = {Prentice Hall; 3 edition},
YEAR = {1991},
AUTHOR = {William Crain},
MONTH = {November},
ISBN = {013913476X}
}
@INBOOK{Crain1991Chap6,
CHAPTER = {Piaget's Cognitive-Developmental Theory},
TITLE = {Theories of Development: Concepts and Applications},
PUBLISHER = {Prentice Hall; 3 edition},
YEAR = {1991},
AUTHOR = {William Crain},
MONTH = {November},
ISBN = {013913476X}
}
@INPROCEEDINGS{Cristianini2001,
AUTHOR = {Nello Cristianini and John Shawe-Taylor and Huma Lodhi},
TITLE = {Latent Semantic Kernels},
BOOKTITLE = {the Eighteenth International Conference on Machine Learning},
YEAR = {2001},
PAGES = {66-73}
}
@INPROCEEDINGS{Cumby2003,
AUTHOR = {Chad Cumby and Dan Roth},
TITLE = {On Kernel Methods for Relational Learning},
BOOKTITLE = {ICML 2003},
YEAR = {2003},
PAGES = {107-114},
ABSTRACT = {Kernel methods have gained a great deal of popularity in the machine
learning community as a method to learn indirectly in high-dimensional
feature spaces. Those interested in relational learning have recently
begun to cast learning from structured and relational data in terms
of kernel operations. We describe a general family of kernel functions
built up from a description language of limited expressivity and
use it to study the benefits and drawbacks of kernel learning in
relational domains. Learning with kernels in this family directly
models learning over an expanded feature space constructed using
the same description language. This allows us to examine issues
of time complexity in terms of learning with these and other relational
kernels, and how these relate to generalization ability. The tradeoffs
between using kernels in a very high dimensional implicit space
versus a restricted feature space, is highlighted through two experiments,
in bioinformatics and in natural language processing.}
}
@INPROCEEDINGS{DzeSchHei96-ILP96,
AUTHOR = {D\v{z}eroski, S. and Schulze-Kremer, S. and Heidtke, K.R. and Siems,
K. and Wettschereck, D.},
TITLE = {Applying {ILP} to Diterpene Structure Elucidation from $^{13}${C}
{NMR} Spectra},
BOOKTITLE = {Proceedings of the 6th International Workshop on Inductive Logic
Programming (ILP96)},
YEAR = {1996},
EDITOR = {Muggleton, S.},
VOLUME = {1314},
SERIES = {Lecture Notes in Artificial Intelligence (LNAI)},
PAGES = {41--54},
PUBLISHER = {Springer-Verlag}
}
@INPROCEEDINGS{Darwiche2002,
AUTHOR = {Adnan Darwiche},
TITLE = {A Logical Approach for Factoring Belief Networks},
BOOKTITLE = {KR 2002},
YEAR = {2002},
PAGES = {409-420},
ABSTRACT = {We have shown recently that a belief network can be represented as
a polynomial and that many probabilistic queries can be recovered
in constant time from the partial derivatives of such a polynomial.
Although this polynomial is exponential in size, we have shown that
it can be "computed" using an arithmetic circuit whose size is not
necessarily exponential. Hence, the key computational question becomes
that of generating the smallest arithmetic circuit that computes
the network...}
}
@INPROCEEDINGS{Darwiche2000,
AUTHOR = {Adnan Darwiche},
TITLE = {A Differential Approach to Inference in Bayesian Networks},
BOOKTITLE = {Uncertainty in Artificial Intelligence},
YEAR = {2000},
ABSTRACT = {We present a new approach for inference in Bayesian networks, which
is mainly based on partial differentiation. According to this approach,
one compiles a Bayesian network into a multivariate polynomial and
then computes the partial derivatives of this polynomial with respect
to each variable. We show that once such derivatives are made available,
one can compute in constant-time answers to a large class of probabilistic
queries, which are central to classical inference, parameter estimation,...}
}
@ARTICLE{Debnath1991,
AUTHOR = {A.K. Debnath and R.L. Lopez de Compadre and G. Debnath and A.J. Shusterman
and C. Hansch},
TITLE = {Structure-Activity Relationship of Mutagenic Aromatic and Heteroaromatic
Nitro Compounds. Correlation with Molecular Orbital Energies and
Hydrophobicity.},
JOURNAL = {J Med Chem.},
YEAR = {1991},
VOLUME = {34},
PAGES = {786-797},
NUMBER = {2},
MONTH = {Feb.}
}
@INPROCEEDINGS{Dechter1997,
AUTHOR = {Rina Dechter},
TITLE = {Mini-Buckets: A General Scheme For Generating Approximations In Automated
Reasoning},
BOOKTITLE = {Fifteenth International Joint Conference of Artificial Intelligence
(IJCAI97)},
YEAR = {1997},
ADDRESS = {Japan}
}
@INPROCEEDINGS{Dechter1996,
AUTHOR = {Rina Dechter},
TITLE = {Bucket elimination: A unifying framework for probabilistic inference},
BOOKTITLE = {Twelthth Conf. on Uncertainty in Artificial Intelligence},
YEAR = {1996},
PAGES = {211--219},
ABSTRACT = {Probabilistic inference algorithms for finding the most probable explanation,
the maximum aposteriori hypothesis, and the maximum expected utility
and for updating belief are reformulated as an elimination--type
algorithm called bucket elimination. This emphasizes the principle
common to many of the algorithms appearing in that literature and
clarifies their relationship to nonserial dynamic programming algorithms.
We also present a general way of combining conditioning and elimination
within...}
}
@ARTICLE{Deerwester1990,
AUTHOR = {Scott Deerwester and Susan T. Dumais and George W. Furnas and Thomas
K. Landauer and Richard Harshman},
TITLE = {Indexing by Latent Semantic Analysis},
JOURNAL = {Journal of the American Society of Information Science},
YEAR = {1990},
VOLUME = {41},
PAGES = {391-407},
NUMBER = {6},
ABSTRACT = {A new method for automatic indexing and retrieval is described. The
approach is to take advantage of implicit higher-order structure
in the association of terms with documents ("semantic structure")
in order to improve the detection of relevant documents on the basis
of terms found in queries. The particular technique used is singular-value
decomposition, in which a large term by document matrix is decomposed
into a set of ca 100 orthogonal factors from which the original
matrix can be...}
}
@INPROCEEDINGS{Demmer1998,
AUTHOR = {Michael J. Demmer and Maurice P. Herlihy},
TITLE = {The Arrow Distributed Directory Protocol},
BOOKTITLE = {12th International Symposium on Distributed Computing},
YEAR = {1998},
PAGES = {119-133},
ADDRESS = {Greece},
ABSTRACT = {Most practical techniques for locating remote objects in a distributed
system su er from problems of scalability and locality of reference.
We have devised the Arrow distributed directory protocol, a scalable
and local mechanism for ensuring mutually exclusive access to mobile
objects. This directory has communication complexity optimal within
a factor of (1 +MST-stretch(G))=2, where MST-stretch(G) is the \minimum
spanning tree stretch" of the underlying network. 1 Introduction
Many...}
}
@BOOK{Denneberg1994,
TITLE = {Non-additive Measure and Integral},
PUBLISHER = {Kluwer Academic Publishers, Dordrecht},
YEAR = {1994},
AUTHOR = {D. Denneberg}
}
@ARTICLE{Denning1987,
AUTHOR = {Dorothy E. Denning},
TITLE = {An intrusion-detection model},
JOURNAL = {IEEE Trans. Softw. Eng.},
YEAR = {1987},
VOLUME = {13},
PAGES = {222--232},
NUMBER = {2},
ISSN = {0098-5589},
PUBLISHER = {IEEE Press}
}
@INPROCEEDINGS{desJardins2000,
AUTHOR = {Marie desJardins and Lise Getoor and Daphne Koller},
TITLE = {Using Feature Hierarchies in Bayesian Network Learning},
BOOKTITLE = {SARA '02: Proceedings of the 4th International Symposium on Abstraction,
Reformulation, and Approximation},
YEAR = {2000},
PAGES = {260--270},
PUBLISHER = {Springer-Verlag},
ISBN = {3-540-67839-5}
}
@ARTICLE{Dhar1993,
AUTHOR = {V. Dhar and A. Tuzhilin},
TITLE = {Abstract-Driven Pattern Discovery in Databases},
JOURNAL = {IEEE Transactions on Knowledge and Data Engineering},
YEAR = {1993},
VOLUME = {5},
PAGES = {926--938},
NUMBER = {6},
DOI = {http://dx.doi.org/10.1109/69.250075},
ISSN = {1041-4347},
PUBLISHER = {IEEE Educational Activities Department}
}
@INPROCEEDINGS{Dhillon2001,
AUTHOR = {Inderjit S. Dhillon},
TITLE = {Co-clustering documents and words using bipartite spectral graph
partitioning},
BOOKTITLE = {Knowledge Discovery and Data Mining},
YEAR = {2001},
PAGES = {269-274}
}
@INPROCEEDINGS{Dickerson2001,
AUTHOR = {John E. Dickerson and Jukka Juslin and Ourania Koukousoula and Julie
A. Dickerson},
TITLE = {Fuzzy intrusion detection},
BOOKTITLE = {IFSA World Congress and 20th North American Fuzzy Information Processing
Society (NAFIPS) International Conference},
YEAR = {2001},
PAGES = {1506-1510},
ADDRESS = {Vancouver, British Columbia}
}
@INPROCEEDINGS{Dissanayake2000,
AUTHOR = {M. W. M. G. Dissanayake and P. Newman and Hugh F. Durrant-Whyte and
Steve Clark and M. Csorba},
TITLE = {An Experimental and Theoretical Investigation into Simultaneous Localisation
and Map Building},
BOOKTITLE = {The Sixth International Symposium on Experimental Robotics VI},
YEAR = {2000},
PAGES = {265--274},
ADDRESS = {London, UK},
PUBLISHER = {Springer-Verlag},
ISBN = {1-85233-210-7}
}
@INPROCEEDINGS{Doan2002,
AUTHOR = {AnHai Doan and Jayant Madhavan and Pedro Domingos and Alon Halevy},
TITLE = {Learning to Map between Ontologies on the Semantic Web},
BOOKTITLE = {the eleventh international conference on World Wide Web},
YEAR = {2002},
ADDRESS = {Honolulu, Hawaii, USA},
ABSTRACT = {Ontologies play a prominent role on the Semantic Web. They make possible
the widespread publication of machine understandable data, opening
myriad opportunities for automated information processing. However,
because of the Semantic Web's distributed nature, data on it will
inevitably come from many different ontologies. Information processing
across ontologies is not possible without knowing the semantic mappings
between their elements. Manually finding such mappings is tedious,
error-prone, and clearly not possible at the Web scale. Hence, the
development of tools to assist in the ontology mapping process is
crucial to the success of the Semantic Web.We describe glue, a system
that employs machine learning techniques to find such mappings.
Given two ontologies, for each concept in one ontology glue finds
the most similar concept in the other ontology. We give well-founded
probabilistic definitions to several practical similarity measures,
and show that glue can work with all of them. This is in contrast
to most existing approaches, which deal with a single similarity
measure. Another key feature of glue is that it uses multiple learning
strategies, each of which exploits a different type of information
either in the data instances or in the taxonomic structure of the
ontologies. To further improve matching accuracy, we extend glue
to incorporate commonsense knowledge and domain constraints into
the matching process. For this purpose, we show that relaxation
labeling, a well-known constraint optimization technique used in
computer vision and other fields, can be adapted to work efficiently
in our context. Our approach is thus distinguished in that it works
with a variety of well-defined similarity notions and that it efficiently
incorporates multiple types of knowledge. We describe a set of experiments
on several real-world domains, and show that glue proposes highly
accurate semantic mappings.}
}
@INPROCEEDINGS{Domingos1998,
AUTHOR = {Pedro Domingos},
TITLE = {Occam's two razors: the sharp and the blunt},
BOOKTITLE = {Proc. 4 th Int Conf Knowledge Discovery and Data Mining},
YEAR = {1998},
PAGES = {37--43},
PUBLISHER = {AAAI Press}
}
@ARTICLE{domingos97optimality,
AUTHOR = {Pedro Domingos and Michael J. Pazzani},
TITLE = {On the Optimality of the Simple Bayesian Classifier under Zero-One
Loss},
JOURNAL = {Machine Learning},
YEAR = {1997},
VOLUME = {29},
PAGES = {103--130},
NUMBER = {2-3}
}
@INPROCEEDINGS{domingos96beyond,
AUTHOR = {Pedro Domingos and Michael J. Pazzani},
TITLE = {Beyond Independence: Conditions for the Optimality of the Simple
Bayesian Classifier},
BOOKTITLE = {International Conference on Machine Learning},
YEAR = {1996},
PAGES = {105-112},
URL = {citeseer.ist.psu.edu/domingos96beyond.html}
}
@INPROCEEDINGS{Donlon1999,
AUTHOR = {J. Donlon and K. Forbus},
TITLE = {Using a geographic information system for qualitative spatial reasoning
about trafficability},
BOOKTITLE = {Proceedings of the Qualitative Reasoning Workshop},
YEAR = {1999},
ADDRESS = {Loch Awe, Scotland}
}
@INPROCEEDINGS{Doorenbos1997,
AUTHOR = {Robert B. Doorenbos and Oren Etzioni and Daniel S. Weld},
TITLE = {A scalable comparison-shopping agent for the World-Wide Web},
BOOKTITLE = {the first international conference on Autonomous agents},
YEAR = {1997},
PAGES = {39 - 48},
ADDRESS = {Marina del Rey, California}
}
@INPROCEEDINGS{Druschel2002,
AUTHOR = {Peter Druschel and Sitaram Iyer and Antony Rowstron},
TITLE = {Squirrel: A decentralized peer to peer web cache},
BOOKTITLE = {PODC 2002},
YEAR = {2002}
}
@BOOK{Duda2000,
TITLE = {Pattern Classification (2nd Edition)},
PUBLISHER = {Wiley-Interscience},
YEAR = {2000},
AUTHOR = {Richard O. Duda and Peter E. Hart and David G. Stork},
ISBN = {471056693}
}
@INPROCEEDINGS{Dumais1998,
AUTHOR = {Susan Dumais and John Platt and David Heckerman and Mehran Sahami},
TITLE = {Inductive learning algorithms and representations for text categorization},
BOOKTITLE = {CIKM '98: Proceedings of the seventh international conference on
Information and knowledge management},
YEAR = {1998},
PAGES = {148--155},
PUBLISHER = {ACM Press},
DOI = {http://doi.acm.org/10.1145/288627.288651},
ISBN = {1-58113-061-9},
LOCATION = {Bethesda, Maryland, United States}
}
@INPROCEEDINGS{Dzeroski1998,
AUTHOR = {Saso Dzeroski and Luc De Raedt and Hendrik Blockeel},
TITLE = {Relational reinforcement learning},
BOOKTITLE = {International Workshop on Inductive Logic Programming},
YEAR = {1998},
PAGES = {136--143},
ADDRESS = {Madison, WI},
ABSTRACT = {Relational reinforcement learning is presented, a learning technique
that combines reinforcement learning with relational learning or
inductive logic programming. Due to the use of a more expressive
representation language to represent states, actions and Qfunctions,
relational reinforcement learning can be potentially applied to
a new range of learning tasks. One such task that we investigate
is planning in the blocks world, where it is assumed that the effects
of the actions are ...}
}
@INPROCEEDINGS{Easterlin1985,
AUTHOR = {J.D. Easterlin and Pat Langley},
TITLE = {A framework for concept formation},
BOOKTITLE = {Proceedings of the Seventh Conference of the Cognitive Science Society},
YEAR = {1985},
PAGES = {267--271},
ADDRESS = {Irvine, CA, USA}
}
@TECHREPORT{Endler2004,
AUTHOR = {David Endler},
TITLE = {Intrusion Detection using Solaris' Basic Security Module},
INSTITUTION = {TechTarget, Inc.},
YEAR = {2004},
ADDRESS = {Needham, MA},
MONTH = {July},
OWNER = {dkkang},
TIMESTAMP = {2006.01.18},
URL = {http://www.securityfocus.com/print/infocus/1211}
}
@INPROCEEDINGS{Engelson1992,
AUTHOR = {S. Engelson and D. McDermott},
TITLE = {Error correction in mobile robot map learning},
BOOKTITLE = {Proceedings of the IEEE International Conference on Robotics \& Automation
(ICRA)},
YEAR = {1992}
}
@INPROCEEDINGS{Eskin2000,
AUTHOR = {Eleazar Eskin},
TITLE = {Anomaly Detection over Noisy Data using Learned Probability Distributions},
BOOKTITLE = {the 2000 International Conference on Machine Learning (ICML-2000)},
YEAR = {2000},
ADDRESS = {Palo Alto, CA},
ABSTRACT = {Traditional anomaly detection techniques focus on detecting anomalies
in new data after training on normal (or clean) data. In this paper
we present a technique for detecting anomalies without training
on normal data. We present a method for detecting anomalies within
a data set that contains a large number of normal elements and relatively
few anomalies. We present a mixture model for explaining the presence
of anomalies in the data. Motivated by the model, the approach uses
machine learning techniques to estimate a probability distribution
over the data and applies a statistical test to detect the anomalies.
The anomaly detection technique is applied to intrusion detection
by examining intrusions manifested as anomalies in UNIX system call
traces.}
}
@ARTICLE{Eskin2002,
AUTHOR = {Eleazar Eskin and Andrew Arnold and Michael Prerau and Leonid Portnoy
and Salvatore Stolfo},
TITLE = {A Geometric Framework for Unsupervised Anomaly Detection: Detecting
Intrusions in Unlabeled Data},
JOURNAL = {Data Mining for Security Applications},
YEAR = {2002}
}
@ARTICLE{Estivill-Castro2002,
AUTHOR = {Vladimir Estivill-Castro},
TITLE = {Why so many clustering algorithms: a position paper},
JOURNAL = {SIGKDD Explorations},
YEAR = {2002},
VOLUME = {4},
PAGES = {65-75},
NUMBER = {1}
}
@INPROCEEDINGS{Eyheramendy2003,
AUTHOR = {Susana Eyheramendy and David D. Lewis and David Madigan},
TITLE = {On the Naive Bayes Model for Text Categorization},
BOOKTITLE = {Ninth International Workshop on Artificial Intelligence and Statistics},
YEAR = {2003}
}
@TECHREPORT{Fang1997,
AUTHOR = {Weiwu Fang},
TITLE = {FDOD Function and the Information Discrepancy Contained in Multiple
Probability Distributions},
INSTITUTION = {DIMACS Center, Rutgers University},
YEAR = {1997},
NUMBER = {DIMACS TR: 97-36},
ABSTRACT = {The concept of Shannon information has played a significant role in
a variety of scientific and engineering areas. The question naturally
arises: how can we measure information discrepancy contained in
two or more probability distributions? The answer to this problem
will be very interesting in both theory and practice. Some measures
for the cases of two or three distributions have presented by the
pioneers, but these measures have some disadvantages; moreover,
there doesn't exist a measure for $n$ distributions so far. A FDOD
function with many good properties has been introduced in the study
of information discrepancy of judgments of multiple experts ( FW
1994). In this paper, based on the ideas concerned with Shannon
information and measures of difference, we propose an axiom set
for measuring the information discrepancy contained in a group of
distributions, and prove that the only function satisfying the axiom
set is of the FDOD form. The final results and even the intermediate
results in deed show the close connection of the FDOD function with
Shannon information and the measures of difference in statistics.}
}
@TECHREPORT{Fawcett2003,
AUTHOR = {Tom Fawcett},
TITLE = {{ROC} graphs: Notes and practical considerations for researchers},
INSTITUTION = {HP Labs},
YEAR = {2003},
NUMBER = {HPL-2003-4}
}
@ARTICLE{Feigenbaum2001,
AUTHOR = {Joan Feigenbaum and Christos H. Papadimitriou and Scott Shenker},
TITLE = {Sharing the Cost of Multicast Transmissions},
JOURNAL = {Journal of Computer and System Sciences},
YEAR = {2001},
VOLUME = {63},
PAGES = {21-41},
NUMBER = {1}
}
@ARTICLE{Firestone1996,
AUTHOR = {L. Firestone and S. Rupert and J. Olson and W. Mueller},
TITLE = {Automated Feature Extraction: The Key to Future Productivity},
JOURNAL = {Photogrammetric Engineering and Remote Sensing},
YEAR = {1996},
VOLUME = {62},
PAGES = {671--674},
NUMBER = {6}
}
@INPROCEEDINGS{Flach2003,
AUTHOR = {Peter A. Flach},
TITLE = {The Geometry of ROC Space: Understanding Machine Learning Metrics
through ROC Isometrics},
BOOKTITLE = {the 20th International Conference on Machine Learning (ICML 2003)},
YEAR = {2003},
PAGES = {194-201},
PUBLISHER = {AAAI Press},
ABSTRACT = {Many different metrics are used in machine learning and data mining
to build and evaluate models. However, there is no general theory
of machine learning metrics, that could answer questions such as:
When we simultaneously want to optimise two criteria, how can or
should they be traded off? Some metrics are inherently independent
of class and misclassification cost distributions, while other are
not -- can this be made more precise? This paper provides a derivation
of ROC space from first principles through 3D ROC space and the
skew ratio, and redefines metrics in these dimensions. The paper
demonstrates that the graphical depiction of machine learning metrics
by means of ROC isometrics gives many useful insights into the characteristics
of these metrics, and provides a foundation on which a theory of
machine learning metrics can be built.}
}
@ARTICLE{Flach2004,
AUTHOR = {Peter Flach and Nicolas Lachiche},
TITLE = {Naive Bayesian Classification of Structured Data},
JOURNAL = {Machine Learning},
YEAR = {2004},
VOLUME = {57},
PAGES = {233--269}
}
@INPROCEEDINGS{Forrest1996,
AUTHOR = {Stephanie Forrest and Steven A. Hofmeyr and Anil Somayaji and Thomas
A. Longstaff},
TITLE = {A Sense of Self for Unix Processes},
BOOKTITLE = {Proceedings of the 1996 IEEE Symposium on Security and Privacy},
YEAR = {1996},
PAGES = {120--128},
PUBLISHER = {IEEE Computer Society},
ISBN = {0-8186-7417-2}
}
@ARTICLE{Freund1997,
AUTHOR = {Yoav Freund and Robert E. Schapire},
TITLE = {A decision-theoretic generalization of on-line learning and an application
to boosting},
JOURNAL = {Journal of Computer and System Sciences},
YEAR = {1997},
VOLUME = {55},
PAGES = {119 - 139},
NUMBER = {1}
}
@INPROCEEDINGS{freund96experiments,
AUTHOR = {Yoav Freund and Robert E. Schapire},
TITLE = {Experiments with a New Boosting Algorithm},
BOOKTITLE = {International Conference on Machine Learning},
YEAR = {1996},
PAGES = {148-156},
URL = {citeseer.ist.psu.edu/freund96experiments.html}
}
@INPROCEEDINGS{Friedman1998,
AUTHOR = {Nir Friedman},
TITLE = {The Bayesian Structural EM Algorithm},
BOOKTITLE = {Fourteenth Conf. on Uncertainty in Artificial Intelligence (UAI 98)},
YEAR = {1998},
ABSTRACT = {In recent years there has been a flurry of works on learning Bayesian
networks from data. One of the hard problems in this area is how
to effectively learn the structure of a belief network from incomplete
data---that is, in the presence of missing values or hidden variables.
In a recent paper, I introduced an algorithm called Structural EM
that combines the standard Expectation Maximization (EM) algorithm,
which optimizes parameters, with structure search for model selection.
That algorithm learns networks based on penalized likelihood scores,
which include the BIC/MDL score and various approximations to the
Bayesian score. In this paper, I extend Structural EM to deal directly
with Bayesian model selection. I prove the convergence of the resulting
algorithm and show how to apply it for learning a large class of
probabilistic models, including Bayesian networks and some variants
thereof..}
}
@ARTICLE{Friedman1997,
AUTHOR = {Nir Friedman and Dan Geiger and Moises Goldszmidt},
TITLE = {Bayesian Network Classifiers},
JOURNAL = {Mach. Learn.},
YEAR = {1997},
VOLUME = {29},
PAGES = {131--163},
NUMBER = {2-3},
ISSN = {0885-6125},
PUBLISHER = {Kluwer Academic Publishers}
}
@INPROCEEDINGS{DBLP:conf/ijcai/FriedmanGKP99,
AUTHOR = {Nir Friedman and Lise Getoor and Daphne Koller and Avi Pfeffer},
TITLE = {Learning Probabilistic Relational Models.},
BOOKTITLE = {Proceedings of the Sixteenth International Joint Conference on Artificial
Intelligence, IJCAI 99, Stockholm, Sweden, July 31 - August 6, 1999.
2 Volumes, 1450 pages},
YEAR = {1999},
EDITOR = {Thomas Dean},
PAGES = {1300-1309},
PUBLISHER = {Morgan Kaufmann},
ISBN = {1-55860-613-0}
}
@INPROCEEDINGS{Friedman1996,
AUTHOR = {Nir Friedman and Moises Goldszmidt},
TITLE = {Building Classifiers using Bayesian Networks},
BOOKTITLE = {AAAI/IAAI},
YEAR = {1996},
VOLUME = {2},
PAGES = {1277-1284}
}
@INPROCEEDINGS{Friedman2001,
AUTHOR = {Nir Friedman and Daphne Koller},
TITLE = {Learning Bayesian Networks From Data},
BOOKTITLE = {NIPS 2001},
YEAR = {2001},
NOTE = {Tutorial}
}
@ARTICLE{Friedman2002,
AUTHOR = {Nir Friedman and Matan Ninio and Itsik Pe'er and Tal Pupko},
TITLE = {A Structural EM Algorithm for Phylogentic Inference},
JOURNAL = {Journal of Computational Biology},
YEAR = {2002},
VOLUME = {9},
PAGES = {331-353},
ABSTRACT = {A central task in the study of molecular evolution is the reconstruction
of a phylogenetic tree from sequences of current-day taxa. The most
established approach to tree reconstruction is maximum likelihood
(ML) analysis. Unfortunately, searching for the maximum likelihood
phylogenetic tree is computationally prohibitive for large data
sets. In this paper, we describe a new algorithm that uses Structural
EM for learning maximum likelihood phylogenetic trees. This algorithm
is similar to the standard EM method for edge-length estimation,
except that during iterations of the Structural EM algorithm the
topology is improved as well as the edge length. Our algorithm performs
iterations of two steps. In the E-Step, we use the current tree
topology and edge lengths to compute expected su.cient statistics,
which summarize the data. In the M-Step, we search for a topology
that maximizes the likelihood with respect to these expected su.cient
statistics. We show that searching for better topologies inside
the M-step can be done e.ciently, as opposed to standard methods
for topology search. We prove that each iteration of this procedure
increases the likelihood of the topology, and thus the procedure
must converge. This convergence point, however, can be a sub-optimal
one. To escape from such “local optima? we further enhance our basic
EM procedure by incorporating moves in the .avor of simulated annealing.
We evaluate these new algorithms on both synthetic and real sequence
data, and show that for protein sequences even our basic algorithm
.nds more plausible trees than existing methods for searching maximum
likelihood phylogenies. Furthermore, our algorithms are dramatically
faster than such methods, enabling, for the .rst time, phylogenetic
analysis of large protein data sets in the maximum likelihood framework.}
}
@ARTICLE{Fua1996,
AUTHOR = {P. Fua},
TITLE = {Model-based Optimization: Accurate and Consistent Site Modeling},
JOURNAL = {International Archives for Photogrammetry and Remote Sensing},
YEAR = {1996},
VOLUME = {31},
PAGES = {222--233},
NUMBER = {B3},
PUBLISHER = {Plenum Press}
}
@INPROCEEDINGS{Forstner1987,
AUTHOR = {W. F{\"o}rstner and E. Gulch},
TITLE = {A Fast Operator for Detection and Precise Location of Distinct Points,
Corners and Centers of Circular Features},
BOOKTITLE = {Proceedings ISPRS Intercommission Workshop on Fast Processing of
Photogrammetric Data},
YEAR = {1987},
ADDRESS = {Interlaken},
MONTH = {June}
}
@INCOLLECTION{Gallistel1999,
AUTHOR = {Charles R. Gallistel},
TITLE = {Coordinate transformations in the genesis of directed action},
BOOKTITLE = {Cognitive Science},
PUBLISHER = {Academic Press},
YEAR = {1999},
EDITOR = {Benjamin Bly and David Rumelhart},
PAGES = {1-42},
ADDRESS = {New York},
OWNER = {dkkang},
TIMESTAMP = {2005.11.23}
}
@INPROCEEDINGS{gama98,
AUTHOR = {Joao Gama},
TITLE = {Local Cascade Generalization},
BOOKTITLE = {ICML '98: Proceedings of the Fifteenth International Conference on
Machine Learning},
YEAR = {1998},
PAGES = {206--214},
ADDRESS = {San Francisco, CA, USA},
PUBLISHER = {Morgan Kaufmann Publishers Inc.},
ISBN = {1-55860-556-8}
}
@ARTICLE{gama00,
AUTHOR = {Jo{\~a}o Gama and Pavel Brazdil},
TITLE = {Cascade Generalization},
JOURNAL = {Machine Learning},
YEAR = {2000},
VOLUME = {41},
PAGES = {315--343},
NUMBER = {3}
}
@INPROCEEDINGS{Ganesan2003,
AUTHOR = {Prasanna Ganesan and Qixiang Sun and Hector Garcia-Molina},
TITLE = {YAPPERS: A Peer-to-Peer Lookup Service over Arbitrary Topology},
BOOKTITLE = {IEEE INFOCOM},
YEAR = {2003},
ABSTRACT = {Existing peer-to-peer search networks generally fall into two categories:
Gnutella-style systems that use arbitrary topology and rely on controlled
flooding for search, and systems that explicitly build an underlying
topology to efficiently support a distributed hash table (DHT).
In this paper, we propose a hybrid scheme for building a peer-to-peer
lookup service over arbitrary network topology. Specifically, for
each node in the search network, we build a small DHT consisting
of nearby nodes...}
}
@INPROCEEDINGS{Ganti1999,
AUTHOR = {Venkatesh Ganti and Johannes Gehrke and Raghu Ramakrishnan},
TITLE = {CACTUS - clustering categorical data using summaries},
BOOKTITLE = {Proceedings of the fifth ACM SIGKDD international conference on Knowledge
discovery and data mining},
YEAR = {1999},
PAGES = {73--83},
PUBLISHER = {ACM Press},
DOI = {http://doi.acm.org/10.1145/312129.312201},
ISBN = {1-58113-143-7},
LOCATION = {San Diego, California, United States}
}
@ARTICLE{Garofalakis2003,
AUTHOR = {Minos Garofalakis and Aristides Gionis and Rajeev Rastogi, S. Seshadri
and Kyuseok Shim},
TITLE = {XTRACT: Learning Document Type Descriptors from XML Document Collections},
JOURNAL = {Data Mining and Knowledge Discovery},
YEAR = {2003},
VOLUME = {7},
PAGES = {23-56}
}
@INPROCEEDINGS{Gerkey2003,
AUTHOR = {Brian P. Gerkey and Richard T. Vaughan and Andrew Howard},
TITLE = {The Player/Stage Project: Tools for Multi-Robot and Distributed Sensor
Systems},
BOOKTITLE = {Proceedings of the International Conference on Advanced Robotics
(ICAR)},
YEAR = {2003},
PAGES = {317-323},
ADDRESS = {Coimbra, Portugal},
MONTH = {Jul},
OWNER = {DK},
TIMESTAMP = {2006.03.06}
}
@ARTICLE{Getoor2002,
AUTHOR = {Lise Getoor and Nir Friedman and Daphne Koller and Benjamin Taskar},
TITLE = {Learning Probabilistic Models of Link Structure},
JOURNAL = {Journal of Machine Learning Research},
YEAR = {2002},
VOLUME = {3},
PAGES = {679 - 707},
NUMBER = {SPECIAL ISSUE},
ABSTRACT = {Most real-world data is heterogeneous and richly interconnected. Examples
include the Web, hypertext, bibliometric data and social networks.
In contrast, most statistical learning methods work with “flat?data
representations, forcing us to convert our data into a form that
loses much of the link structure. The recently introduced framework
of probabilistic relational models (PRMs) embraces the object-relational
nature of structured data by capturing probabilistic interactions
between attributes of related entities. In this paper, we extend
this framework by modeling interactions between the attributes and
the link structure itself. An advantage of our approach is a unified
generative model for both content and relational structure. We propose
two mechanisms for representing a probabilistic distribution over
link structures: reference uncertainty and existence uncertainty.
We describe the appropriate conditions for using each model and
present learning algorithms for each. We present experimental results
showing that the learned models can be used to predict link structure
and, moreover, the observed link structure can be used to provide
better predictions for the attributes in the model.},
KEYWORDS = {Probabilistic Relational Models, Bayesian Networks, Relational Learning}
}
@INPROCEEDINGS{Getoor2001,
AUTHOR = {Lise Getoor and Nir Friedman and Daphne Koller and Benjamin Taskar},
TITLE = {Learning Probabilistic Models of Relational Structure},
BOOKTITLE = {ICML '01: Proceedings of the Eighteenth International Conference
on Machine Learning},
YEAR = {2001},
PAGES = {170--177},
ADDRESS = {San Francisco, CA, USA},
PUBLISHER = {Morgan Kaufmann Publishers Inc.},
ISBN = {1-55860-778-1}
}
@INPROCEEDINGS{Ghosh1999,
AUTHOR = {Anup Ghosh and Aaron Schwartzbard},
TITLE = {A study in using neural networks for anomaly and misuse detection},
BOOKTITLE = {8th USENIX Security Symposium},
YEAR = {1999},
PAGES = {141-151},
ADDRESS = {Washington, D.C.}
}
@ARTICLE{Gibson1998,
AUTHOR = {David Gibson and Jon Kleinberg and Prabhakar Raghavan},
TITLE = {Clustering Categorical Data: An Approach Based on Dynamical Systems},
JOURNAL = {VLDB Journal: Very Large Data Bases},
YEAR = {1998},
VOLUME = {8},
PAGES = {222-236},
NUMBER = {3-4}
}
@ARTICLE{gibson00clustering,
AUTHOR = {David Gibson and Jon M. Kleinberg and Prabhakar Raghavan},
TITLE = {Clustering Categorical Data: An Approach Based on Dynamical Systems},
JOURNAL = {VLDB Journal: Very Large Data Bases},
YEAR = {2000},
VOLUME = {8},
PAGES = {222--236},
NUMBER = {3--4},
URL = {citeseer.ist.psu.edu/article/gibson98clustering.html}
}
@ARTICLE{Gibson1988,
AUTHOR = {Eleanor Gibson},
TITLE = {Exploratory behavior in the development of perceiving, acting, and
the acquiring of knowledge},
JOURNAL = {Annual Review of Psychology},
YEAR = {1988},
VOLUME = {39},
PAGES = {1--41}
}
@BOOK{Gibson1979,
TITLE = {The ecological approach to visual perception},
PUBLISHER = {Lawrence Erlbaum Associates},
YEAR = {1979},
AUTHOR = {James J. Gibson},
ISBN = {898599598}
}
@INCOLLECTION{Gibson1977,
AUTHOR = {James J. Gibson},
TITLE = {The Theory of Affordances},
BOOKTITLE = {Perceiving, Acting, and Knowing},
PUBLISHER = {Lawrence Erlbaum, Hillsdale},
YEAR = {1977},
EDITOR = {R. E. Shaw and J. Bransford}
}
@INPROCEEDINGS{Giles1998,
AUTHOR = {C. Lee Giles and Kurt D. Bollacker and Steve Lawrence},
TITLE = {CiteSeer: An Automatic Citation Indexing System},
BOOKTITLE = {Digital Libraries 98 - Third ACM Conference on Digital Libraries},
YEAR = {1998},
PAGES = {89-98},
ABSTRACT = {We present CiteSeer: an autonomous citation indexing system which
indexes academic literature in electronic format (e.g. Postscript
files on the Web). CiteSeer understands how to parse citations,
identify citations to the same paper in different formats, and identify
the context of citations in the body of articles. CiteSeer provides
most of the advantages of traditional (manually constructed) citation
indexes (e.g. the ISI citation indexes), including: literature retrieval
by following citation links (e.g. by providing a list of papers
that cite a given paper), the evaluation and ranking of papers,
authors, journals, etc. based on the number of citations, and the
identification of research trends. CiteSeer has many advantages
over traditional citation indexes, including the ability to create
more up-to-date databases which are not limited to a preselected
set of journals or restricted by journal publication delays, completely
autonomous operation with a corresponding reduction in cost, and
powerful interactive browsing of the literature using the context
of citations. Given a particular paper of interest, CiteSeer can
display the context of how the paper is cited in subsequent publications.
This context may contain a brief summary of the paper, another author's
response to the paper, or subsequent work which builds upon the
original article. CiteSeer allows the location of papers by keyword
search or by citation links. Papers related to a given paper can
be located using common citation information or word vector similarity.
CiteSeer will soon be available for public use.}
}
@INPROCEEDINGS{Goldman2000,
AUTHOR = {Roy Goldman and Jennifer Widom},
TITLE = {WSQ/DSQ: A Practical Approach for Combined Querying of Databases
and the Web},
BOOKTITLE = {the ACM SIGMOD Int. Conf. on Management of Data},
YEAR = {2000},
PAGES = {285--296},
ADDRESS = {Dallas, US},
ABSTRACT = {We present WSQ/DSQ (pronounced "wisk-disk"), a new approach for combining
the query facilities of traditional databases with existing search
engines on the Web. WSQ, for Web-Supported (Database) Queries, leverages
results from Web searches to enhance SQL queries over a relational
database. DSQ, for Database-Supported (Web) Queries, uses information
stored in the database to enhance and explain Web searches. This
paper focuses primarily on WSQ, describing a simple, low-overhead
way to...}
}
@INPROCEEDINGS{Gonzales2001,
AUTHOR = {L. Gonz{\'a}les},
TITLE = {Universal Aggregation Operators},
BOOKTITLE = {EusFlat'2001},
YEAR = {2001},
ADDRESS = {Leicester}
}
@ARTICLE{Grunwald2003,
AUTHOR = {Peter D. Grunwald and Joseph Y. Halpern},
TITLE = {Updating Probabilities},
JOURNAL = {Journal of Artificial Intelligence Research (JAIR)},
YEAR = {2003},
VOLUME = {19},
PAGES = {243-278},
ABSTRACT = {As examples such as the Monty Hall puzzle show, applying conditioning
to update a probability distribution on a ``naive space'', which
does not take into account the protocol used, can often lead to
counterintuitive results. Here we examine why. A criterion known
as CAR (``coarsening at random'') in the statistical literature
characterizes when ``naive'' conditioning in a naive space works.
We show that the CAR condition holds rather infrequently, and we
provide a procedural characterization of it, by giving a randomized
algorithm that generates all and only distributions for which CAR
holds. This substantially extends previous characterizations of
CAR. We also consider more generalized notions of update such as
Jeffrey conditioning and minimizing relative entropy (MRE). We give
a generalization of the CAR condition that characterizes when Jeffrey
conditioning leads to appropriate answers, and show that there exist
some very simple settings in which MRE essentially never gives the
right results. This generalizes and interconnects previous results
obtained in the literature on CAR and MRE.}
}
@INPROCEEDINGS{Guha1998,
AUTHOR = {Sudipto Guha and Rajeev Rastogi and Kyuseok Shim},
TITLE = {CURE: An Efficient Clustering Algorithm for Large Databases},
BOOKTITLE = {ACM SIGMOD International Conference on Management of Data},
YEAR = {1998},
PAGES = {73--84},
ABSTRACT = {Clustering, in data mining, is useful for discovering groups and identifying
interesting distributions in the underlying data. Traditional clustering
algorithms either favor clusters with spherical shapes and similar
sizes, or are very fragile in the presence of outliers. We propose
a new clustering algorithm called CURE that is more robust to outliers,
and identifies clusters having non-spherical shapes and wide variances
in size. CURE achieves this by representing each cluster by a certain...}
}
@INPROCEEDINGS{Kayacik2003,
AUTHOR = {Gunes Kayacik, Nur Zincir-Heywood, Malcolm Heywood},
TITLE = {On the Capability of an {S}{O}{M} based Intrusion Detection System},
BOOKTITLE = {The IEEE International Joint Conference on Neural Networks, IJCNN03},
YEAR = {2003}
}
@BOOK{Gusfield1997,
TITLE = {Algorithms on Strings, Trees, and Sequences: Computer Science and
Computational Biology},
PUBLISHER = {Cambridge University Press},
YEAR = {1997},
AUTHOR = {Dan Gusfield},
EDITION = {$1^{st}$},
OWNER = {dkkang},
TIMESTAMP = {2006.05.26}
}
@INPROCEEDINGS{Hammer1997,
AUTHOR = {Joachim Hammer and Hector Garcia-Molina and Junghoo Cho and Arturo
Crespo and Rohan Aranha},
TITLE = {Extracting Semistructured Information from the Web},
BOOKTITLE = {the Workshop on Management fo Semistructured Data},
YEAR = {1997}
}
@INPROCEEDINGS{Hammer1997sigmod,
AUTHOR = {Joachim Hammer and Hector Garcia-Molina and Svetlozar Nestorov and
Ramana Yerneni and Marcus Breunig and Vasilis Vassalos},
TITLE = {Template-based wrappers in the TSIMMIS system},
BOOKTITLE = {Twenty-Third ACM SIGMOD International Conference on Management of
Data},
YEAR = {1997},
ADDRESS = {Tucson, Arizona}
}
@INCOLLECTION{han96exploration,
AUTHOR = {Jiawei Han and Yongjian Fu},
TITLE = {Exploration of the Power of Attribute-Oriented Induction in Data
Mining},
BOOKTITLE = {Advances in Knowledge Discovery and Data Mining},
PUBLISHER = {AIII Press/MIT Press},
YEAR = {1996},
EDITOR = {Usama M. Fayyad and Gregory Piatetsky-Shapiro and Padhr Smyth and
Ramasamy Uthurusamy},
ISBN = {0-262-56097-6 (softcover)},
URL = {citeseer.ist.psu.edu/han96exploration.html}
}
@ARTICLE{harnad90theSymbol,
AUTHOR = {S. Harnad},
TITLE = {The Symbol Grounding Problem},
JOURNAL = {Physica D: Nonlinear Phenomena},
YEAR = {1990},
VOLUME = {42},
PAGES = {335--346},
URL = {http://www.isrl.uiuc.edu/~amag/langev/paper/harnad90theSymbol.html}
}
@ARTICLE{Hart1968,
AUTHOR = {P. E. Hart and N. J. Nilsson and B. Raphael},
TITLE = {A Formal Basis for the Heuristic Determination of Minimum Cost Paths},
JOURNAL = {IEEE Transactions on Systems Science and Cybernetics (SSC)},
YEAR = {1968},
VOLUME = {4},
PAGES = {100-107},
NUMBER = {2}
}
@INPROCEEDINGS{Harvey2003,
AUTHOR = {Nicholas J. A. Harvey and Michael B. Jones and Stefan Saroiu and
Marvin Theimer and Alec Wolman},
TITLE = {Skipnet: A scalable overlay network with practical locality properties},
BOOKTITLE = {the Fourth USENIX Symposium on Internet Technologies and Systems
(USITS '03)},
YEAR = {2003},
ADDRESS = {Seattle, WA},
ABSTRACT = {Scalable overlay networks such as Chord, CAN, Pastry, and Tapestry
have recently emerged as flexible infrastructure for building large
peer-to-peer systems. In practice, such systems have two disadvantages:
They provide no control over where data is stored and no guarantee
that routing paths remain within an administrative domain whenever
possible. SkipNet is a scalable overlay network that provides controlled
data placement and guaranteed routing locality by organizing data
primarily by string names. SkipNet allows for both fine-grained
and coarse-grained control over data placement: Content can be placed
either on a pre-determined node or distributed uniformly across
the nodes of a hierarchical naming subtree. An additional useful
consequence of SkipNet's locality properties is that partition failures,
in which an entire organization disconnects from the rest of the
system, can result in two disjoint, but well-connected overlay networks.},
KEYWORDS = {Peer-to-Peer, Scalable, Locality, Self-Configuring, Range Query, Distributed
System}
}
@ARTICLE{haussler1988,
AUTHOR = {D. Haussler},
TITLE = {Quantifying inductive bias: A{I} learning algorithms and {V}aliant's
learning framework},
JOURNAL = {Artificial intelligence},
YEAR = {1988},
VOLUME = {36},
PAGES = {177--221}
}
@ARTICLE{Haussler1992,
AUTHOR = {David Haussler},
TITLE = {Decision Theoretic Generalizations of the PAC Model for Neural Net
and Other Learning Applications},
JOURNAL = {Information and Computation},
YEAR = {1992},
VOLUME = {100},
PAGES = {78-150}
}
@INPROCEEDINGS{Haussler1991,
AUTHOR = {David Haussler and Michael Kearns and Robert Schapire},
TITLE = {Bounds on the Sample Complexity of Bayesian Learning Using Information
Theory and the VC Dimension},
BOOKTITLE = {the fourth annual workshop on Computational learning theory},
YEAR = {1991},
PAGES = {61-74},
ADDRESS = {Santa Cruz, California, United States},
ABSTRACT = {In this paper we study a Bayesian or average-case model of concept
learning with a twofold goal: to provide more precise characterizations
of learning curve (sample complexity) behavior that depend on properties
of both the prior distribution over concepts and the sequence of
instances seen by the learner, and to smoothly unite in a common
framework the popular statistical physics and VC dimension theories
of learning curves. To achieve this, we undertake a systematic investigation
and...}
}
@INPROCEEDINGS{Haveliwala2002,
AUTHOR = {Taher H. Haveliwala},
TITLE = {Topic-Sensitive PageRank},
BOOKTITLE = {the Eleventh International World Wide Web Conference},
YEAR = {2002}
}
@TECHREPORT{Haveliwala1999,
AUTHOR = {Taher H. Haveliwala},
TITLE = {Efficient Computation of Pagerank},
INSTITUTION = {Stanford University},
YEAR = {1999},
NUMBER = {1999-31},
MONTH = {1999},
ABSTRACT = {This paper discusses efficient techniques for computing PageRank,
a ranking metric for hypertext documents. We show that PageRank
can be computed for very large subgraphs of the web (up to hundreds
of millions of nodes) on machines with limited main memory. Running-time
measurements on various memory configurations are presented for
PageRank computation over the 24-million-page Stanford WebBase archive.
We discuss several methods for analyzing the convergence of PageRank
based on the induced ordering of the pages. We present convergence
results helpful for determining the number of iterations necessary
to achieve a useful PageRank assignment, both in the absence and
presence of search queries.}
}
@BOOK{Hawkins2004,
TITLE = {On Intelligence},
PUBLISHER = {Times Books},
YEAR = {2004},
AUTHOR = {Jeff Hawkins and Sandra Blakeslee},
ISBN = {805074562}
}
@INPROCEEDINGS{Hearst1995,
AUTHOR = {Marti A. Hearst},
TITLE = {TileBars: Visualization of Term Distribution Information in Full
Text Information Access},
BOOKTITLE = {Proceedings of the Conference on Human Factors in Computing Systems,
{CHI}'95},
YEAR = {1995},
ADDRESS = {Denver, CO},
URL = {citeseer.ist.psu.edu/hearst95tilebars.html}
}
@ARTICLE{Hearst2002,
AUTHOR = {Marti Hearst and Ame Elliott and Jennifer English and Rashmi Sinha
and Kirsten Swearingen and Ka-Ping Yee},
TITLE = {Finding the flow in web site search},
JOURNAL = {Communications of the ACM},
YEAR = {2002},
VOLUME = {45},
PAGES = {42 - 49},
NUMBER = {9},
ABSTRACT = {Designing a search system and interface may best be served (and executed)
by scrutinizing usability studies.}
}
@INPROCEEDINGS{Heller2003,
AUTHOR = {Katherine A Heller and Krysta M Svore and Angelos D. Keromytis and
Salvatore J. Stolfo},
TITLE = {One Class Support Vector Machines for Detecting Anomalous Window
Registry Accesses},
BOOKTITLE = {The 3rd IEEE Conference Data Mining Workshop on Data Mining for Computer
Security},
YEAR = {2003},
ADDRESS = {Florida}
}
@ARTICLE{Helmer2003,
AUTHOR = {Guy Helmer and Johnny Wong and Vasant Honavar and Les Miller},
TITLE = {Lightweight Agents for Intrusion Detection},
JOURNAL = {Journal of Systems and Software},
YEAR = {2003},
VOLUME = {67},
PAGES = {109-122}
}
@INPROCEEDINGS{Helmer1999,
AUTHOR = {Guy Helmer and Johnny Wong and Vasant Honavar and Les Miller},
TITLE = {Data-Driven Induction of Compact Predictive Rules for Intrusion Detection
from System Log Data},
BOOKTITLE = {the Conference on Genetic and Evolutionary Computation (GECCO 99)},
YEAR = {1999},
ADDRESS = {Orlando, Florida}
}
@INPROCEEDINGS{Helmer2001,
AUTHOR = {Guy Helmer and Johnny Wong and Mark Slagell and Vasant Honavar and
Les Miller and Robyn Lutz},
TITLE = {A Software Fault Tree Approach to Requirement Analysis of an Intrusion
Detection System},
BOOKTITLE = {Symposium on Requirements Engineering for Information Security},
YEAR = {2001}
}
@INPROCEEDINGS{Helmer1998,
AUTHOR = {Guy Helmer and Johnny S. K. Wong and Vasant Honavar and Les Miller},
TITLE = {Intelligent Agents for Intrusion Detection},
BOOKTITLE = {IEEE Information Technology Conference},
YEAR = {1998},
PAGES = {121-124},
ADDRESS = {Syracuse, NY},
ABSTRACT = {This paper focuses on intrusion detection and countermeasures with
respect to widely-used operating systems and networks. The design
and architecture of an intrusion detection system built from distributed
agents is proposed to implement an intelligent system on which data
mining can be performed to provide global, temporal views of an
entire networked system. A starting point for agent intelligence
in our system is the research into the use of machine learning over
system call traces from the...}
}
@ARTICLE{Helmer2002,
AUTHOR = {Guy Helmer and Johnny S. K. Wong and Vasant G. Honavar and Les Miller},
TITLE = {Automated discovery of concise predictive rules for intrusion detection},
JOURNAL = {J. Syst. Softw.},
YEAR = {2002},
VOLUME = {60},
PAGES = {165--175},
NUMBER = {3},
DOI = {http://dx.doi.org/10.1016/S0164-1212(01)00088-7},
ISSN = {0164-1212},
PUBLISHER = {Elsevier Science Inc.}
}
@TECHREPORT{Hendler1996,
AUTHOR = {Hendler, J. and Stoffel, K. and Taylor, M.},
TITLE = {Advances in High Performance Knowledge Representation},
INSTITUTION = {University of Maryland Institute for Advanced Computer Studies Dept.
of Computer Science},
YEAR = {1996},
NUMBER = {CS-TR-3672}
}
@ARTICLE{Hipp2000,
AUTHOR = {Jochen Hipp and Ulrich Guntzer and Gholamreza Nakhaeizadeh},
TITLE = {Algorithms for Association Rule Mining A General Survey and Comparison},
JOURNAL = {SIGKDD Explorations},
YEAR = {2000},
ABSTRACT = {Today there are several efficient algorithms that cope with the popular
and computationally expensive task of association rule mining. Actually,
these algorithms are more or less described on their own. In this
paper we explain the fundamentals of association rule mining and
moreover derive a general framework. Based on this we describe today
's approaches in context by pointing out common aspects and di erences.
After that we thoroughly investigate their strengths and weaknesses
and carry out...}
}
@ARTICLE{Hofmann2001,
AUTHOR = {Thomas Hofmann},
TITLE = {Unsupervised Learning by Probabilistic Latent Semantic Analysis},
JOURNAL = {Machine Learning},
YEAR = {2001},
VOLUME = {42},
PAGES = {177 - 196},
ABSTRACT = {This paper presents a novel statistical method for factor analysis
of binary and count data which is closely related to a technique
known as Latent Semantic Analysis. In contrast to the latter method
which stems from linear algebra and performs a Singular Value Decomposition
of co-occurrence tables, the proposed technique uses a generative
latent class model to perform a probabilistic mixture decomposition.
This results in a more principled approach with a solid foundation
in statistical inference. More precisely, we propose to make use
of a temperature controlled version of the Expectation Maximization
algorithm for model fitting, which has shown excellent performance
in practice. Probabilistic Latent Semantic Analysis has many applications,
most prominently in information retrieval, natural language processing,
machine learning from text, and in related areas. The paper presents
perplexity results for different types of text and linguistic data
collections and discusses an application in automated document indexing.
The experiments indicate substantial and consistent improvements
of the probabilistic method over standard Latent Semantic Analysis.}
}
@INPROCEEDINGS{Hofmann1999,
AUTHOR = {Thomas Hofmann},
TITLE = {The Cluster-Abstraction Model: Unsupervised Learning of Topic Hierarchies
from Text Data},
BOOKTITLE = {IJCAI 99},
YEAR = {1999},
ABSTRACT = {This paper presents a novel statistical latent class model for text
mining and interactive information access. The described learning
architecture, called Cluster--Abstraction Model (CAM), is purely
data driven and utilizes context-specific word occurrence statistics.
In an intertwined fashion, the CAM extracts hierarchical relations
between groups of documents as well as an abstractive organization
of keywords. An annealed version of the Expectation--Maximization
(EM) algorithm for maximum...}
}
@INPROCEEDINGS{Hofmann1999sigir,
AUTHOR = {Thomas Hofmann},
TITLE = {Probabilistic latent semantic indexing},
BOOKTITLE = {the 22nd annual international ACM SIGIR conference on Research and
development in information retrieval},
YEAR = {1999},
PAGES = {50-57},
ADDRESS = {Berkeley, California, United States},
PUBLISHER = {ACM Press, New York, NY, USA}
}
@INPROCEEDINGS{Hofmann1999uai,
AUTHOR = {Thomas Hofmann},
TITLE = {Probabilistic Latent Semantic Analysis},
BOOKTITLE = {Uncertainty in Artificial Intelligence},
YEAR = {1999},
ADDRESS = {Stockholm},
ABSTRACT = {Probabilistic Latent Semantic Analysis is a novel statistical technique
for the analysis of two--mode and co-occurrence data, which has
applications in information retrieval and filtering, natural language
processing, machine learning from text, and in related areas. Compared
to standard Latent Semantic Analysis which stems from linear algebra
and performs a Singular Value Decomposition of co-occurrence tables,
the proposed method is based on a mixture decomposition derived
from a latent class...}
}
@ARTICLE{hofmeyr98intrusion,
AUTHOR = {Steven A. Hofmeyr and Stephanie Forrest and Anil Somayaji},
TITLE = {Intrusion Detection Using Sequences of System Calls},
JOURNAL = {Journal of Computer Security},
YEAR = {1998},
VOLUME = {6},
PAGES = {151-180},
NUMBER = {3},
URL = {citeseer.ist.psu.edu/hofmeyr98intrusion.html}
}
@INPROCEEDINGS{Hotho2003,
AUTHOR = {Andreas Hotho and Steffen Staab and Gerd Stumme},
TITLE = {WordNet improves text document clustering},
BOOKTITLE = {Proc. of the SIGIR 2003 Semantic Web Workshop},
YEAR = {2003}
}
@ARTICLE{Huang1994,
AUTHOR = {Cecil Huang and Adnan Darwiche},
TITLE = {Inference in Belief Networks: A Procedural Guide},
JOURNAL = {International Journal of Approximate Reasoning},
YEAR = {1994},
VOLUME = {15},
PAGES = {225-263},
NUMBER = {3},
ABSTRACT = {Belief networks are popular tools for encoding uncertainty in expert
systems. These networks rely on inference algorithms to compute
beliefs in the context of observed evidence. One established method
for exact inference on belief networks is the Probability Propagation
in Trees of Clusters (PPTC) algorithm, as developed by Lauritzen
and Spiegelhalter and refined by Jensen et al. PPTC converts the
belief network into a secondary structure, then computes probabilities
by manipulating the...}
}
@ARTICLE{Huang2003,
AUTHOR = {Xiaoqiu Huang and Kun-Mao Chao},
TITLE = {A generalized global alignment algorithm},
JOURNAL = {Bioinformatics},
YEAR = {2003},
VOLUME = {19},
PAGES = {228-233},
NUMBER = {2},
ABSTRACT = {Motivation: Homologous sequences are sometimes similar over some regions
but different over other regions. Homologous sequences have a much
lower global similarity if the different regions are much longer
than the similar regions. Results: We present a generalized global
alignment algorithm for comparing sequences with intermittent similarities,
an ordered list of similar regions separated by different regions.
A generalized global alignment model is defined to handle sequences
with intermittent similarities. A dynamic programming algorithm
is designed to compute an optimal general alignment in time proportional
to the product of sequence lengths and in space proportional to
the sum of sequence lengths. The algorithm is implemented as a computer
program named GAP3 (Global Alignment Program Version 3). The generalized
global alignment model is validated by experimental results produced
with GAP3 on both DNA and protein sequences. The GAP3 program extends
the ability of standard global alignment programs to recognize homologous
sequences of lower similarity. The GAP3 program is freely available
for academic use at http://bioinformatics.iastate.edu/aat/align/align.html.}
}
@INPROCEEDINGS{Indyk1999,
AUTHOR = {Piotr Indyk},
TITLE = {Sublinear Time Algorithms for Metric Space Problems},
BOOKTITLE = {STOC 99},
YEAR = {1999},
PAGES = {428--434}
}
@INPROCEEDINGS{Jaakkola1999,
AUTHOR = {Tommi Jaakkola and Marina Meila and Tony Jebara},
TITLE = {Maximum entropy discrimination},
BOOKTITLE = {NIPS 1999},
YEAR = {1999},
PAGES = {470-476},
ABSTRACT = {We present a general framewrk for discriminative estimation based
on the maximum entropy principle and its extensions. All calculations
involve...}
}
@INPROCEEDINGS{Jeffreys1946,
AUTHOR = {Jeffreys, H.},
TITLE = {An invariant form for the prior probability in estimation procedures},
BOOKTITLE = {Proceedings of the Royal Society, London, Ser. A, 186},
YEAR = {1946},
PAGES = {453-461},
ADDRESS = {London, UK}
}
@INPROCEEDINGS{Jensen2002,
AUTHOR = {David Jensen and Jennifer Neville},
TITLE = {Linkage and Autocorrelation Cause Feature Selection Bias in Relational
Learning},
BOOKTITLE = {ICML '02: Proceedings of the Nineteenth International Conference
on Machine Learning},
YEAR = {2002},
PAGES = {259--266},
ADDRESS = {San Francisco, CA, USA},
PUBLISHER = {Morgan Kaufmann Publishers Inc.},
ISBN = {1-55860-873-7}
}
@ARTICLE{Jin2003,
AUTHOR = {Lixia Jin and Weiwu Fang and Huanwen Tang},
TITLE = {Prediction of protein structural classes by a new measure of information
discrepancy},
JOURNAL = {Computational Biology and Chemistry},
YEAR = {2003},
VOLUME = {27},
PAGES = {373-380},
NUMBER = {3}
}
@INPROCEEDINGS{joachims98text,
AUTHOR = {Thorsten Joachims},
TITLE = {Text categorization with support vector machines: learning with many
relevant features},
BOOKTITLE = {Proceedings of {ECML}-98, 10th European Conference on Machine Learning},
YEAR = {1998},
EDITOR = {Claire N{\'e}dellec and C{\'e}line Rouveirol},
PAGES = {137--142},
ADDRESS = {Chemnitz, DE},
PUBLISHER = {Springer Verlag, Heidelberg, DE},
URL = {citeseer.ist.psu.edu/joachims97text.html}
}
@INPROCEEDINGS{John95,
AUTHOR = {George John and Pat Langley},
TITLE = {Estimating Continuous Distributions in Bayesian Classifiers},
BOOKTITLE = {Proceedings of the 11th Annual Conference on Uncertainty in Artificial
Intelligence (UAI-95)},
YEAR = {1995},
PAGES = {338-345},
ADDRESS = {San Francisco, CA},
PUBLISHER = {Morgan Kaufmann Publishers}
}
@INPROCEEDINGS{Jones2001,
AUTHOR = {A. Jones and S. Li},
TITLE = {Temporal Signatures for Intrusion Detection},
BOOKTITLE = {ACSAC '01: Proceedings of the 17th Annual Computer Security Applications
Conference},
YEAR = {2001},
PAGES = {252},
ADDRESS = {Washington, DC, USA},
PUBLISHER = {IEEE Computer Society},
ISBN = {0-7695-1405-7}
}
@INPROCEEDINGS{Kamvar2003,
AUTHOR = {Sepandar Kamvar and Mario Schlosser and Hector Garcia-Molina},
TITLE = {EigenRep: Reputation Management in P2P Networks},
BOOKTITLE = {the 12th International World Wide Web Conference},
YEAR = {2003},
ADDRESS = {Budapest, Hungary}
}
@INPROCEEDINGS{Kandola2002,
AUTHOR = {Jaz Kandola and John Shawe-Taylor and Nello Cristianini},
TITLE = {Learning semantic similarity},
BOOKTITLE = {NIPS 2002},
YEAR = {2002},
VOLUME = {15}
}
@INPROCEEDINGS{Kang2003ismis,
AUTHOR = {Dae-Ki Kang and Joongmin Choi},
TITLE = {{MetaNews}: An Information Agent for Gathering News Articles on the
Web},
BOOKTITLE = {Foundations of Intelligent Systems, 14th International Symposium,
{ISMIS} 2003, Maebashi City, Japan, October 28-31, 2003, Proceedings},
YEAR = {2003},
EDITOR = {Ning Zhong and Zbigniew W. Ras and Shusaku Tsumoto and Einoshin Suzuki},
VOLUME = {2871},
SERIES = {Lecture Notes in Computer Science},
PAGES = {179-186},
PUBLISHER = {Springer}
}
@INPROCEEDINGS{KangICCS1997,
AUTHOR = {Dae-Ki Kang and Yun-Koo Chung and Woong-Rok Doh},
TITLE = {One-to-many template matching for automated visual inspection},
BOOKTITLE = {Poster session of the First International Conference on Cognitive
Science},
YEAR = {1997},
ADDRESS = {Seoul, Korea},
MONTH = {August},
OWNER = {dkkang},
TIMESTAMP = {2006.07.10}
}
@ARTICLE{KangIJMTM1999,
AUTHOR = {Dae-Ki Kang and Yun-Koo Chung and Woong-Rok Doh and Won Jung and
Sang-Bong Park},
TITLE = {Applying object modelling technique to automated visual inspection
of automotive compressor parts omission},
JOURNAL = {International Journal of Machine Tools and Manufacture},
YEAR = {1999},
VOLUME = {39},
PAGES = {1779--1792},
NUMBER = {11},
MONTH = {August},
OWNER = {dkkang},
TIMESTAMP = {2006.07.10}
}
@INPROCEEDINGS{KangICSC1997,
AUTHOR = {Dae-Ki Kang and Yun-Koo Chung and Won Jung and Woong-Rok Doh and
Sang-Bong Park},
TITLE = {Automated visual inspection of automotive evaporator core using one-to-many
template matching},
BOOKTITLE = {Proceedings of the Second International ICSC Symposium on Intelligent
Industrial Automation},
YEAR = {1997},
ADDRESS = {Nimes, France},
MONTH = {September},
OWNER = {dkkang},
TIMESTAMP = {2006.07.10}
}
@INPROCEEDINGS{dkkang2005isi,
AUTHOR = {Dae-Ki Kang and Doug Fuller and Vasant Honavar},
TITLE = {Learning Classifiers for Misuse Detection Using a Bag of System Calls
Representation},
BOOKTITLE = {Proceedings of {IEEE} International Conference on Intelligence and
Security Informatics {(ISI}-2005)},
YEAR = {2005},
VOLUME = {3495},
PAGES = {511-516},
ADDRESS = {Atlanta, GA, USA},
MONTH = {May},
PUBLISHER = {Springer-Verlag},
JOURNAL = {Lecture Notes in Computer Science}
}
@INPROCEEDINGS{Kang2005iaw,
AUTHOR = {Dae-Ki Kang and Doug Fuller and Vasant Honavar},
TITLE = {Learning Classifiers for Misuse and Anomaly Detection Using a Bag
of System Calls Representation},
BOOKTITLE = {Proceedings of 6th IEEE Systems Man and Cybernetics Information Assurance
Workshop (IAW)},
YEAR = {2005},
ADDRESS = {West Point, NY, USA}
}
@INPROCEEDINGS{KangWebnet1997,
AUTHOR = {Dae-Ki Kang and Joong-Bae Kim and Ho-Sang Ham},
TITLE = {HANMAUM - a multi-agent model for customer, merchant, and directory
service},
BOOKTITLE = {Proceedings of the Second World Conference of the WWW, Internet,
Intranet},
YEAR = {1997},
ADDRESS = {Toronto, Canada},
MONTH = {October},
OWNER = {dkkang},
TIMESTAMP = {2006.07.10}
}
@INPROCEEDINGS{KangINET1997,
AUTHOR = {Dae-Ki Kang and Joong-Bae Kim and Joo-Chan Sohn and Ho-Sang Ham},
TITLE = {A world wide web directory service architecture for electronic commerce},
BOOKTITLE = {Proceedings of the Seventh Annual Conference of Internet Society},
YEAR = {1997},
ADDRESS = {Kuala Lumpur, Malaysia},
MONTH = {June},
OWNER = {dkkang},
TIMESTAMP = {2006.07.10}
}
@INPROCEEDINGS{dkkang2006Recursive,
AUTHOR = {Dae-Ki Kang and Adrian Silvescu and Vasant Honavar},
TITLE = {{RNBL-MN}: A Recursive Naive Bayes Learner for Sequence Classification},
BOOKTITLE = {10th Pacific-Asia Conference on Knowledge Discovery and Data Mining
(PAKDD 2006)},
YEAR = {2006},
VOLUME = {3918},
SERIES = {Lecture Notes in Artificial Intelligence},
ADDRESS = {Singapore},
MONTH = {April},
PUBLISHER = {Springer Verlag}
}
@INPROCEEDINGS{dkkang2004kdo,
AUTHOR = {Dae-Ki Kang and Adrian Silvescu and Jun Zhang and Vasant Honavar},
TITLE = {Generation of Attribute Value Taxonomies from Data and Their Use
in Data-Driven Construction of Accurate and Compact Naive Bayes
Classifiers},
BOOKTITLE = {Proceedings of {ECML/PKDD}-2004 Knowledge Discovery and Ontologies
Workshop {(KDO}-2004)},
YEAR = {2004},
ADDRESS = {Pisa, Italy},
MONTH = {September}
}
@INPROCEEDINGS{Kang2004icdm,
AUTHOR = {Dae-Ki Kang and Adrian Silvescu and Jun Zhang and Vasant Honavar},
TITLE = {Generation of Attribute Value Taxonomies from Data for Data-Driven
Construction of Accurate and Compact Classifiers.},
BOOKTITLE = {Proceedings of the 4th IEEE International Conference on Data Mining
(ICDM 2004), 1-4 November 2004, Brighton, UK},
YEAR = {2004},
PAGES = {130--137},
BIBSOURCE = {DBLP, http://dblp.uni-trier.de},
EE = { http://csdl.computer.org/comp/proceedings/icdm/2004/2142/00/21420130abs.htm}
}
@INPROCEEDINGS{Kang2005sara,
AUTHOR = {Dae-Ki Kang and Jun Zhang and Adrian Silvescu and Vasant Honavar},
TITLE = {Multinomial Event Model Based Abstraction for Sequence and Text Classification},
BOOKTITLE = {Abstraction, Reformulation and Approximation, 6th International Symposium,
SARA 2005, Edinburgh, Scotland, UK, July 26-29, 2005, Proceedings},
YEAR = {2005},
SERIES = {Lecture Notes in Computer Science},
PAGES = {134--148},
PUBLISHER = {Springer}
}
@INPROCEEDINGS{Karger1997,
AUTHOR = {David Karger and Eric Lehman and Tom Leighton and Mathhew Levine
and Daniel Lewin and Rina Panigrahy},
TITLE = {Consistent Hashing and Random Trees: Distributed Caching Protocols
for Relieving Hot Spots on the World Wide Web},
BOOKTITLE = {ACM Symposium on Theory of Computing},
YEAR = {1997},
PAGES = {654--663},
ABSTRACT = {We describe a family of caching protocols for distributed networks
that can be used to decrease or eliminate the occurrence of hot
spots in the network. Our protocols are particularly designed for
use with very large networks such as the Internet, where delays
caused by hot spots can be severe, and where it is not feasible
for every server to have complete information about the current
state of the entire network. The protocols are easy to implement
using existing network protocols such as...}
}
@INPROCEEDINGS{Karger1999,
AUTHOR = {David Karger and Alex Sherman and Andy Berkheimer and Bill Bogstad
and Rizwan Dhanidina and Ken Iwamoto and Brian Kim and Luke Matkins
and Yoav Yerushalmi},
TITLE = {Web Caching with Consistent Hashing},
BOOKTITLE = {the eighth international conference on World Wide Web},
YEAR = {1999},
PAGES = {1203 - 1213},
ADDRESS = {Toronto, Canada},
ABSTRACT = {A key performance measure for the World Wide Web is the speed with
which content is served to users. As traffic on the Web increases,
users are faced with increasing delays and failures in data delivery.
Web caching is one of the key strategies that has been explored
to improve performance. An important issue in many caching systems
is how to decide what is cached where at any given time. Solutions
have included multicast queries and directory schemes. In this paper,
we offer a new web caching strategy based on consistent hashing.
Consistent hashing provides an alternative to multicast and directory
schemes, and has several other advantages in load balancing and
fault tolerance. Its performance was analyzed theoretically in previous
work; in this paper we describe the implementation of a consistent-hashing
based system and experiments that support our thesis that it can
provide performance improvements.}
}
@INPROCEEDINGS{Kearns1993,
AUTHOR = {Michael Kearns},
TITLE = {Efficient Noise-Tolerant Learning From Statistical Queries},
BOOKTITLE = {the Twenty-Fifth Annual ACM Symposium on Theory of Computing},
YEAR = {1993},
PAGES = {392-401}
}
@ARTICLE{Kearns1997,
AUTHOR = {Michael Kearns and Yishay Mansour and Andrew Y. Ng and Dana Ron},
TITLE = {An Experimental and Theoretical Comparison of Model Selection Methods},
JOURNAL = {Machine Learning},
YEAR = {1997},
VOLUME = {27},
PAGES = {7-50},
ABSTRACT = {We investigate the problem of model selection in the setting of supervised
learning of boolean functions from independent random examples.
More precisely, we compare methods for finding a balance between
the complexity of the hypothesis chosen and its observed error on
a random training sample of limited size, when the goal is that
of minimizing the resulting generalization error. We undertake a
detailed comparison of three wellknown model selection methods .
a variation of Vapnik’s Guaranteed Risk Minimization (GRM), an instance
of Rissanen’s Minimum Description Length Principle (MDL), and (hold-out)
cross validation (CV). We introduce a general class of model selection
methods (called penalty-based methods) that includes both GRM and
MDL, and provide general methods for analyzing such rules. We provide
both controlled experimental evidence and formal theorems to support
the following conclusions:}
}
@INCOLLECTION{Kercel2005,
AUTHOR = {S. W. Kercel and P. Bach-Y-Rita},
TITLE = {Non-Invasive Coupling of Electronically Generated Data Into the Human
Nervous System},
BOOKTITLE = {Wiley Encyclopedia of Biomedical Engineering},
PUBLISHER = {Wiley},
YEAR = {2005},
EDITOR = {Metin Akay},
NOTE = {In Press}
}
@ARTICLE{King1995,
AUTHOR = {R. D. King and A. Srinivasan and M. J .E. Sternberg},
TITLE = {Relating chemical activity to structure: an examination of ILP successes},
JOURNAL = {New Gen. Comput.},
YEAR = {1995},
VOLUME = {13},
PAGES = {411--433},
NUMBER = {3,4}
}
@ARTICLE{Kleinberg1999,
AUTHOR = {Jon M. Kleinberg},
TITLE = {Authoritative sources in a hyperlinked environment},
JOURNAL = {Journal of the ACM},
YEAR = {1999},
VOLUME = {46},
PAGES = {604--632},
NUMBER = {5}
}
@INPROCEEDINGS{ecmlKlimtY04,
AUTHOR = {Bryan Klimt and Yiming Yang},
TITLE = {The {E}nron Corpus: A New Dataset for Email Classification Research.},
BOOKTITLE = {15th European Conference on Machine Learning (ECML2004). Vol. 3201
of Lecture Notes in Computer Science : Springer-Verlag},
YEAR = {2004},
PAGES = {217-226},
MONTH = {September},
BIBSOURCE = {DBLP, http://dblp.uni-trier.de},
EE = { http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3201{\&}spage=217}
}
@INPROCEEDINGS{DBLP:conf/pkdd/KnobbeSM02,
AUTHOR = {Arno J. Knobbe and Arno Siebes and Bart Marseille},
TITLE = {Involving Aggregate Functions in Multi-relational Search.},
BOOKTITLE = {Principles of Data Mining and Knowledge Discovery, 6th European Conference,
PKDD 2002, Helsinki, Finland, August 19-23, 2002, Proceedings},
YEAR = {2002},
EDITOR = {Tapio Elomaa and Heikki Mannila and Hannu Toivonen},
VOLUME = {2431},
SERIES = {Lecture Notes in Computer Science},
PAGES = {287-298},
PUBLISHER = {Springer},
EE = {http://link.springer.de/link/service/series/0558/bibs/2431/24310287.htm},
ISBN = {3-540-44037-2}
}
@ARTICLE{Knoblock2000,
AUTHOR = {Craig A. Knoblock and Kristina Lerman and Steven Minton and Ion Muslea},
TITLE = {Accurately and Reliably Extracting Data from the Web: A Machine Learning
Approach},
JOURNAL = {IEEE Data Engineering Bulletin},
YEAR = {2000},
VOLUME = {23},
PAGES = {33-41},
NUMBER = {4},
ABSTRACT = {A critical problem in developing information agents for the Web is
accessing data that is formatted for human use. We have developed
a set of tools for extracting data from web sites and transforming
it into a structured data format, such as XML. The resulting data
can then be used to build new applications without having to deal
with unstructured data. The advantages of our wrapping technology
over previous work are the the ability to learn highly accurate
extraction rules, to verify the...}
}
@INPROCEEDINGS{Knoblock1998,
AUTHOR = {Craig A. Knoblock and Steven Minton and Jose Luis Ambite and Naveen
Ashish and Pragnesh Jay Modi and Ion Muslea and Andrew G. Philpot
and Sheila Tejada},
TITLE = {Modeling Web Sources for Information Integration},
BOOKTITLE = {Fifteenth National Conference on Artificial Intelligence},
YEAR = {1998},
ABSTRACT = {The Web is based on a browsing paradigm that makes it difficult to
retrieve and integrate data from multiple sites. Today, the only
way to do this is to build specialized applications, which are time-consuming
to develop and difficult to maintain. We are addressing this problem
by creating the technology and tools for rapidly constructing information
agents that extract, query, and integrate data from web sources.
Our approach is based on a simple, uniform representation that makes
it efficient ...}
}
@INPROCEEDINGS{Knorr1998,
AUTHOR = {Edwin M. Knorr and Raymond T. Ng},
TITLE = {Algorithms for Mining Distance-Based Outliers in Large Datasets},
BOOKTITLE = {24th Int. Conf. Very Large Data Bases, VLDB},
YEAR = {1998},
PAGES = {392--403},
ABSTRACT = {This paper deals with finding outliers (exceptions) in large, multidimensional
datasets. The identification of outliers can lead to the discovery
of truly unexpected knowledge in areas such as electronic commerce,
credit card fraud, and even the analysis of performance statistics
of professional athletes. Existing methods that we have seen for
finding outliers in large datasets can only deal efficiently with
two dimensions/attributes of a dataset. Here, we study the notion
of DB- (Distance-...}
}
@INPROCEEDINGS{Koenig2004,
AUTHOR = {Nathan Koenig and Andrew Howard},
TITLE = {Design and Use Paradigms for Gazebo, An Open-Source Multi-Robot Simulator},
BOOKTITLE = {IEEE/RSJ International Conference on Intelligent Robots and Systems
(IROS)},
YEAR = {2004},
PAGES = {2149-2154},
ADDRESS = {Sendai, Japan},
MONTH = {Sep.},
OWNER = {DK},
TIMESTAMP = {2006.03.06}
}
@INPROCEEDINGS{kohavi96scaling,
AUTHOR = {Ron Kohavi},
TITLE = {Scaling Up the Accuracy of {N}aive-{B}ayes Classifiers: a Decision-Tree
Hybrid},
BOOKTITLE = {Proceedings of the Second International Conference on Knowledge Discovery
and Data Mining},
YEAR = {1996},
PAGES = {202--207}
}
@ARTICLE{Kohavi2001,
AUTHOR = {Ron Kohavi and Foster Provost},
TITLE = {Applications of Data Mining to Electronic Commerce},
JOURNAL = {Data Mining and Knowledge Discovery},
YEAR = {2001},
VOLUME = {5},
PAGES = {5--10},
NUMBER = {1-2},
ISSN = {1384-5810},
PUBLISHER = {Kluwer Academic Publishers}
}
@INPROCEEDINGS{Koller2001,
AUTHOR = {Daphne Koller and Brian Milch},
TITLE = {Multi-Agent Influence Diagrams for Representing and Solving Games},
BOOKTITLE = {17th International Joint Conference on Artificial Intelligence (IJCAI)},
YEAR = {2001},
PAGES = {1027-1034},
ABSTRACT = {The traditional representations of games using the extensive form
or the strategic (normal) form obscure much of the structure that
is present in real-world games. In this paper, we propose a new
representation language for general multi-player games -- multi-agent
influence diagrams (MAIDs). This representation extends graphical
models for probability distributions to a multi-agent decision-making
context. MAIDs explicitly encode structure involving the dependence
relationships among variables. As a consequence, we can define a
notion of strategic relevance of one decision variable to another:
D' is strategically relevant to D if, to optimize the decision rule
at D, the decision maker needs to take into consideration the decision
rule at D'. We provide a sound and complete graphical criterion
for determining strategic relevance. We then show how strategic
relevance can be used to detect structure in games, allowing a large
game to be broken up into a set of interacting smaller games, which
can be solved in sequence. We show that this decomposition can lead
to substantial savings in the computational cost of finding Nash
equilibria in these games.}
}
@INPROCEEDINGS{Koller1997,
AUTHOR = {Daphne Koller and Avi Pfeffer},
TITLE = {Object-oriented Bayesian networks},
BOOKTITLE = {the 13th Annual Conference on Uncertainty in AI (UAI)},
YEAR = {1997},
PAGES = {302--313},
ADDRESS = {Providence, Rhode Island},
ABSTRACT = {Bayesian networks provide a modeling language and associated inference
algorithm for stochastic domains. They have been successfully applied
in a variety of medium-scale applications. However, when faced with
a large complex domain, the task of modeling using Bayesian networks
begins to resemble the task of programming using logical circuits.
In this paper, we describe an object-oriented Bayesian network (OOBN)
language, which allows complex domains to be described in terms
of inter-related objects. We use a Bayesian network fragment to
describe the probabilistic relations between the attributes of an
object. These attributes can themselves be objects, providing a
natural framework for encoding part-of hierarchies. Classes are
used to provide a reusable probabilistic model which can be applied
to multiple similar objects. Classes also support inheritance of
model fragments from a class to a subclass, allowing the common
aspects of related classes to be defined only once. Our language
has clear declarative semantics: an OOBN can be interpreted as a
stochastic functional program, so that it uniquely specifies a probabilistic
model. We provide an inference algorithm for OOBNs, and show that
much of the structural information encoded by an OOBN---particularly
the encapsulation of variables within an object and the reuse of
model fragments in different contexts---can also be used to speed
up the inference process.}
}
@INPROCEEDINGS{Krishnapuram2003,
AUTHOR = {Raghu Krishnapuram and Krishna Prasad Chitrapura and Sachindra Joshi},
TITLE = {Classification of Text Documents Based on Minimum System Entropy},
BOOKTITLE = {ICML 2003},
YEAR = {2003},
PAGES = {384-391},
ABSTRACT = {In this paper, we describe a new approach to classification of text
documents based on the minimization of system entropy, i.e., the
overall uncertainty associated with the joint distribution of words
and labels in the collection. The classification algorithm assigns
a class label to a new document in such a way that its insertion
into the system results in the maximum decrease (or least increase)
in system entropy. We provide insights into the minimum system entropy
criterion, and establish connections to traditional naive Bayes
approaches. Experimental results indicate that the algorithm performs
well in terms of classification accuracy. It is less sensitive to
feature selection and more scalable when compared with SVM.}
}
@INPROCEEDINGS{KruegelKMRV05,
AUTHOR = {Christopher Kr{\"u}gel and E. Kirda and D. Mutz and W. Robertson
and G. Vigna},
TITLE = {Automating Mimicry Attacks Using Static Binary Analysis},
BOOKTITLE = {Proceedings of Security~'05, the 14th USENIX Security Symposium},
YEAR = {2005},
PAGES = {161--176},
ADDRESS = {Baltimore, MD, USA},
ABSTRACT = {Intrusion detection systems that monitor sequences of system calls
have recently become more sophisticated in defining legitimate application
behavior. In particular, additional information, such as the value
of the program counter and the configuration of the program's call
stack at each system call, has been used to achieve better characterization
of program behavior. While there is common agreement that this additional
information complicates the task for the attacker, it is less clear
to which extent an intruder is constrained. In this paper, we present
a novel technique to evade the extended detection features of state-of-the-art
intrusion detection systems and reduce the task of the intruder
to a traditional mimicry attack. Given a legitimate sequence of
system calls, our technique allows the attacker to execute each
system call in the correct execution context by obtaining and relinquishing
the control of the application's execution flow through manipulation
of code pointers. We have developed a static analysis tool for Intel
x86 binaries that uses symbolic execution to automatically identify
instructions that can be used to redirect control flow and to compute
the necessary modifications to the environment of the process. We
used our tool to successfully exploit three vulnerable programs
and evade detection by existing state-of-the-art system call monitors.
In addition, we analyzed three real-world applications to verify
the general applicability of our techniques.}
}
@INPROCEEDINGS{kruegel03:syscalls,
AUTHOR = {Christopher Kr{\"u}gel and D. Mutz and F. Valeur and G. Vigna},
TITLE = {{On the Detection of Anomalous System Call Arguments}},
BOOKTITLE = {Proceedings of the 2003 European Symposium on Research in Computer
Security},
YEAR = {2003},
ADDRESS = {Gj\o vik, Norway},
MONTH = {October}
}
@INPROCEEDINGS{DBLP:conf/acsac/KruegelMRV03,
AUTHOR = {Christopher Kr{\"u}gel and Darren Mutz and William Robertson and
Fredrik Valeur},
TITLE = {Bayesian Event Classification for Intrusion Detection.},
BOOKTITLE = {19th Annual Computer Security Applications Conference (ACSAC 2003),
8-12 December 2003, Las Vegas, NV, USA},
YEAR = {2003},
PAGES = {14-23},
PUBLISHER = {IEEE Computer Society},
BIBSOURCE = {DBLP, http://dblp.uni-trier.de},
EE = { http://csdl.computer.org/comp/proceedings/acsac/2003/2041/00/20410014abs.htm},
ISBN = {0-7695-2041-3}
}
@ARTICLE{Kullback1951,
AUTHOR = {Kullback, S. and Leibler, R. A.},
TITLE = {On information and sufficiency},
JOURNAL = {Ann. Math. Statist.},
YEAR = {1951},
VOLUME = {22},
PAGES = {79--86}
}
@ARTICLE{Kushilevitz1993,
AUTHOR = {Eyal Kushilevitz and Yishay Mansour},
TITLE = {Learning Decision Trees using the Fourier Spectrum},
JOURNAL = {SIAM Journal on Computing},
YEAR = {1993},
VOLUME = {22},
PAGES = {1331-1348},
NUMBER = {6}
}
@INPROCEEDINGS{Kushmerick1997,
AUTHOR = {Nickolas Kushmerick and Daniel S. Weld and Robert B. Doorenbos},
TITLE = {Wrapper induction for information extraction},
BOOKTITLE = {Intl. Joint Conference on Artificial Intelligence (IJCAI)},
YEAR = {1997},
PAGES = {729--737}
}
@ARTICLE{Lam1994,
AUTHOR = {Wai Lam and Fahiem Bacchus},
TITLE = {Learning Bayesian Belief Networks An approach based on the MDL Principle},
JOURNAL = {Computational Intelligence},
YEAR = {1994},
VOLUME = {10},
PAGES = {269-293}
}
@INPROCEEDINGS{Landwehr200