Friday, May 23, 2008

Generate SQL INSERT Statements for a SS2005 Table

For quite a while, I have been looking for a utility that would be able to take a large table stored in SQL Server 2005, and generate INSERT statements. I recently tried a few SQL scripts that I found on the web. Sometimes they worked, sometimes they didn't. But, they all had a problem in the fact that the generated statements were dumped to a SQL Results pane. If you had a very large table, then you had trouble cutting and pasting these statements to your favorite text editor.

I finally got sick and tired of this effort and the limitations that I found. So, in my own spare time over last weekend, I dropped into developer mode and wrote a little C# utility that did what I wanted.

The code below was written very quickly. I didn't use StringBuilder, and I did not try to optimize. This is quick and dirty ... and it works just great.

You can use this code for personal use. You cannot reprint this code anywhere without my permission. Feel free to contact me if you have any enhancements.

Sorry for some of the formatting stuff. In particular, the nice line spacing that I use will not show up on the web page.

-marc







using System;
using System.Data;
using System.Data.SqlClient;
using System.Globalization;
using System.IO;

namespace SQLGenerateInsertStatements
{
/// <summary>
/// This was written by Marc Adler (magmasystems at yahoo dot com).
/// This code can be used freely for your own personal use.
/// It may not be reprinted anywhere without permission of Marc Adler.
/// </summary>
static class Program
{
[STAThread]
static void Main(string[] args)
{
string databaseName = null;
string tableName = null;
string connectionString = "Data Source=(local);Initial Catalog={d};Integrated Security=True";
string outputFile = @"c:\GeneratedStatements.sql";

if (args == null args.Length == 0)
{
Usage();
return;
}

for (int i = 0; i < args.Length; i++)
{
string arg = args[i].ToLower();
if (arg == "-table")
{
tableName = args[++i];
}
else if (arg == "-database")
{
databaseName = args[++i];
}
else if (arg == "-output")
{
outputFile = args[++i];
}
else if (arg == "-connectionstring")
{
connectionString = args[++i];
}
else
{
Usage();
return;
}
}

if (string.IsNullOrEmpty(tableName) string.IsNullOrEmpty(databaseName))
{
Console.WriteLine("The table name or the database name was not specified.\n");
Usage();
return;
}

if (connectionString.Contains("{d}"))
connectionString = connectionString.Replace("{d}", databaseName);

try
{
GetData(databaseName, tableName, connectionString, outputFile);
}
catch (Exception exc)
{
Console.WriteLine(exc.Message);
}
}

static private void Usage()
{
Console.WriteLine("SQLGenerateInsertStatements [-help] [-table <tablename>] [-database <databasename>] [-connectionstring <connstring>] [-output <outputfile>]");
Console.WriteLine(@"The default output file is C:\GeneratedStatements.sql");
Console.WriteLine("If the connection string has '{d}' embedded in it, the '{d}' is replaced with the database name.");
Console.WriteLine("If the outputfile string has '{d}' embedded in it, the '{d}' is replaced with today's date.");
}

static private void GetData(string databaseName, string tableName, string connectionString, string outputFile)
{
// Get the (optional) name of the file to write the SQL statements to
if (outputFile.IndexOf("{d}") >= 0)
{
outputFile = outputFile.Replace("{d}", DateTime.Now.ToString("d", new CultureInfo("de-DE")));
}
StreamWriter outputStream = new StreamWriter(outputFile);

// Write the "USE database" statement
outputStream.WriteLine(string.Format("USE [{0}]", databaseName));

using (SqlConnection connection = new SqlConnection(connectionString))
{
using (SqlCommand command = new SqlCommand())
{
// Initialize the SQL Connection
command.Connection = connection;
command.CommandText = "SELECT * FROM " + tableName;
connection.Open();

// Get a DataReader
using (SqlDataReader reader = command.ExecuteReader(CommandBehavior.CloseConnection))
{
// Format the first part of the INSERT statement. This part remains
// constant for every row that is generated.
string sInsert = string.Format("INSERT INTO [{0}] ( ", tableName);
for (int iCol = 0; iCol < reader.FieldCount; iCol++)
{
sInsert += string.Format("[{0}]", reader.GetName(iCol));
if (iCol < reader.FieldCount - 1)
sInsert += ",";
}
sInsert += ") VALUES ({0})";

// Read each row of the table
object[] objs = new object[reader.FieldCount];
int nLines = 1;

while (reader.Read())
{
int n = reader.GetValues(objs);
string sValues = string.Empty;

// Go through each column of the row, and generate a string
for (int i = 0; i < n; i++)
{
try
{
string sVal = (reader.IsDBNull(i)) ? "null" : ObjectToSQLString(objs[i]);
sValues += sVal;
if (i < n - 1)
sValues += ",";
}
catch (DataException)
{
Console.WriteLine(string.Format("Conversion error in Record {0}, Column {1}", nLines, reader.GetName(i)));
return;
}
}

// Dump the INSERT statement to the file
outputStream.WriteLine(string.Format(sInsert, sValues));
nLines++;
}
}
}
}

outputStream.Flush();
outputStream.Close();
}

static string ObjectToSQLString(object o)
{
if (o == null o == DBNull.Value)
return "null";

Type t = o.GetType();

if (t == typeof (string))
return string.Format("'{0}'", ((string) o).Trim());
if (t == typeof (int))
return ((int) o).ToString();
if (t == typeof (long))
return ((long) o).ToString();
if (t == typeof (float))
return ((float) o).ToString();
if (t == typeof (double))
return ((double) o).ToString();
if (t == typeof (bool))
return ((bool) o).ToString();
if (t == typeof (DateTime))
return string.Format("'{0}'", ((DateTime) o));

throw new DataException();
}
}
}


Thursday, May 15, 2008

Vhayu and Aite redux

Our friend Ross Dubin from Vhayu has sent a reassuring message:

Vhayu paid for a research report to be written by Aite. Aite insists on maintaining neutrality in the research and it includes mention of our direct competitors, Kx and OneTick as well as our indirect competitors who handle real-time data analysis, StreamBase, Apama, Aleri, etc. Sang is going to cover the highlights of the research paper during the webcast. That's where his role ends.

Also, our friends from Aleri sent a note to tell me that they are well aware of the lawsuit surrounding Exegy, and that Aleri is well protected from any negative outcomes.

Good news on both fronts.


©2008 Marc Adler - All Rights Reserved

Wednesday, May 14, 2008

Aleri and Exegy

At last week's Accelerating Wall Street conference, it was interesting to see that Aleri brought Scott Parsons from Exegy as their guest.

It was well publicized in the Wall Street press that Hyperfeed had sued Exegy over alleged misdeeds surrounding a botched merger attempt, something that resulted in Hyperfeed going out of business.

To me, a large lawsuit looming over a company like Exegy can bring into question the continued survival of Exegy, should Hyperfeed prevail. I went to Google in order to see if there was any settlement in this lawsuit, and according to the link below, there was an attempt at a settlement, but nothing ever resulted.

http://sec.edgar-online.com/2008/02/29/0000830122-08-000002/Section8.asp

Evidently, Aleri and Exegy have a relationship. However, if I was at a large bank, and I was presented with an Aleri-Exegy solution, I would really have to do some extreme due diligence in the matter. In fact, until this lawsuit was resolved, I would have to be off my rocker to buy anything with Exegy in it.

Exegy is backed by The Acartha Group. I am wondering if Acartha is looking to add a CEP company to their portfolio of companies. I would not be surprised.


©2008 Marc Adler - All Rights Reserved

Analysts and the Vendors They Cover

I received the following invitation the other day:

WEBCAST INVITATION
The Data Management Challenge of Quantitative Analysis

Vhayu invites you to join industry leaders in market data and analytics for a one-hour webcast hosted by Aite Group.

Speakers include:
Sang Lee, Co-founder & Managing Partner / Aite Group

Jeff Hudson, CEO / Vhayu Technologies
David Wilson, Product Manager / Vhayu Technologies

Vhayu has come up with something that they think is an interesting product. Congrats to Jeff, Ross, and the rest of the gang at Vhayu.

However, what is interesting is the fact that they have enlisted Aite group (one of the founders, no less) to host a publicity event.

We look to companies like Gartner, Tabb, and Aite to give us a fairly objective, unbiased look at the technology landscape in the financial sector. However, the presence of an "impartial" analyst and a vendor always gives me an uneasy feeling.

In the dot com era, there was a company/website called Gomez. This company rated the daily effectiveness of online brokers. In fact, CNBC featured The Gomez Report every morning during the go-go years of the late 90's and early 2000's. However, Gomez had banner ads on their website from the brokers who they were supposedly rating.

My opinion is that, if you are going to position yourself as an analyst, you need to give an air of impartiality, and you need to give full disclosure. What is the relationship between Vhayu and Aite? Is Aite receiving any compensation for hosting a webcast that is being created and sponsored by Vhayu? Will Sang Lee agree to host a similar webcast for Vhayu's competitors? Will Sang Lee mention any of Vhayu's competitors during his opening remarks?

For me, there is a bit of a cloud that hangs over any analyst that chooses to associate himself with a particular company.

I like what the Gartner Group does. When I speak at their conference in September, they will not let me speak about, nor will they allow questions about specific vendors. There are no explicit or implicit vendor endorsements allowed when I am on the podium. If a vendor wants to exhibit at a Gartner conference, then they are confined to the vendor area. And, there is no question that Gartner is getting compensated by the vendors for this.




©2008 Marc Adler - All Rights Reserved

Monday, May 12, 2008

Streaming LINQ

Thanks to readers of this blog, here are two open-source implementations of LINQ for streaming data.

Streaming LINQ

Continuous LINQ


©2008 Marc Adler - All Rights Reserved

Wall Street And Tech Conference Report

Jules and I wandered up to the Grand Hyatt around 10:30 for my panel at the Accelerating Wall Street conference that Wall Street And Technology magazine help on May 8th. There were about 80 to 100 registrants at the conference, but I have a feeling that a decent number of those were vendors, since from the second we arrived in the conference, we were accosted by a number of our old friends, like the guys from RTI, Aleri and BEA.

Malcolm West, who is the "Chief Software Architect" of HSBC, never showed up for the panel, so I was on the stage with analysts from Tabb and Aite. All in all, I would say that the panel went very well. The 35 minutes flew by, and I have a feeling that if we had more time, we could have fielded a lot more questions from the audience.

One of the questions that was asked was the possible marriage between distributed memory cache products and CEP products. I have blogged about this here before. I think that there are definite synergies, but the marriage would involve the CEP engine understanding a high-level C# or Java object. And there you have the problems integrating complex objects with the relational structure that is imposed by Streaming SQL. I have a feeling that the integration would be easier with Esper/Nesper.

Another question was about the use of Open Source products. That led me to talk for a few minutes about Esper, and my thoughts on providing an entire eco-system around the CEP engines. However, I get the feeling that Esper is becoming more and more widely used, as it is easy to download and evaluate and get "under the hood" with.

Thanks to the people from WS&T for inviting me ... and a shout out to Ivy Schmerken, whose work I have been reading since I started on Wall Street in 1986. People have come and gone at that publication, but Ivy is the one constant.

The next time I will be speaking is at the Gartner CEP Conference in September. We are taking bets on whether Opher Etzion can stay awake for an entire presentation without playing with his tie.


©2008 Marc Adler - All Rights Reserved

Sunday, May 04, 2008

On Entitlements, Coral8 CCL, SQL, and LINQ

One of the nice things about having kids that are a little older is that it gives me time to putter around on my laptop while watching the Yankees games on TV. I am not doing day-to-day development in Coral8, having handed that aspect of the project over to HH. However, I wanted to see if the entitlements processing of our system could be done in Coral8, which made logical sense.

A brief recap: Our CEP system takes a number of atomic events, puts them through the Coral8 cruncher, and produces derived events. However, we don’t want everyone to see these derived events. We might have information in a derived event that a Prop Trader should not see, or we might have information about a certain financial sector that should be hidden from someone on a trading desk who does not cover that sector.

In addition, we have different kinds of notification mechanisms (GUIs, message buses, email, chat, SMS, etc) that should be utilized depending on the severity level of an event. We don’t want to send several hundred emails to a trader for informational events. However, we might want to email and SMS a trader if we have a “red alert” type of event.

So, we will turn to a familiar pattern called the Recipient List. This is one of the well-documented patterns in the book Enterprise Integration Patterns. I get a good amount of email that asks me for advice on becoming a trading systems developer. My advice is to run, not walk, to this website and book. Most of this stuff is old hack to experienced trading systems developers, but the use cases (especially the one by Jonathan Simon) is worth its weight in gold.

We have come up with a schema and database of entitlement information that marries our users/groups list, severity levels, notification mechanisms, and derived events. As every derived event gets generated by our CEP system, we want to put it through the “entitlements grinder” and come up with a Recipient List of who can see what information in the message, and how they want to be notified of its occurrence.

This seems to be a perfect task for a CEP engine. It can be just one more additional “enrichment filter” whose input we attach to the output of the derived event stream. The output of this enrichment filter consists of the (possibly modified) derived event and the Recipient List.

As an initial step, we implemented the Recipient List Generator as a single SQL query using SQL Server 2005. It is a single query that consists of 2 inner joins and 2 outer joins. It works fairly well.

When I was watching the Yankees game yesterday, I tried porting this query to Coral8. I could not get any variation of this query to compile properly, and when I tried to decompose the query into 4 streams, I got totally different results that what SQL Server gave me. Ideally, “Streaming SQL” languages should be a superset of SQL92. So, in Coral8, if I mirror each SQL Server table as a Coral8 Window with a “KEEP ALL” property, then I should be able to use my SQL Query directly. I would like to do something like this:

INSERT INTO RecipientListOutputStream
SELECT [my original SQL query]

I have given the guys from Coral8 a homework assignment, and asked them to try to take my query and schema and make it work in Coral8.

So, after a frustrating two hours in which I tried to decipher the Coral8 reference documentation and compiler, I decided to turn to another strategy. For shits-and-giggles, I decided to try to write my SQL query in LINQ. I downloaded the experimental Visual LINQ Query Builder from http://code.msdn.microsoft.com/vlinq. I created a new Visual Studio project, pointed the LINQ data sources to the entitlements database, and started plugging away on the VLINQ. In about ten minutes, I had a full LINQ query that implemented my SQL Server query.

(Note: VLINQ was fairly slow on my laptop, and I soon gave up on it, preferring to code the query in LINQ myself. However, Coral8 and other CEP vendors should look at it as a prototype of a visual code generator.)

LINQ has a lot of goodness to it. LINQ is pervasive, and all flavors of LINQ are being developed. I can very well imagine that Microsoft is looking at versions of LINQ that could handle streaming data. Right now, I think that it would be fairly easy to hook up LINQ queries in a pipeline that would handle simple queries on streaming data. Adding streaming SQL constructs is very doable.

If Microsoft was to come out with a Streaming LINQ that is available as part of .NET, how would this affect the world of CEP? An immediate casualty might be NEsper, but that’s OK, since NEsper is just Aaron’s side project right now. But, longer term, I think that a combination of WCF, Streaming LINQ, and a version of Microsoft Analysis Services that was further geared to real-time streams would be a killer to the rest of the CEP industry. (Of course, technology is one thing. Getting all of those Java and Linux bigots over to .Net is another thing.)

©2008 Marc Adler - All Rights Reserved

Friday, May 02, 2008

Tibco EMS and WCF

It looks like Tibco is about to announce WCF support for EMS.

Now, *this* is truly exciting news!

Our framework has an entire hand-written communications layer that allows us to communicate over TCP/IP, EMS, various market data systems, MSMQ, etc. We would like nothing more that to gut this layer and replace it with WCF. The missing ingredient has always been EMS support. Now, hopefully, this will be rectified, and hopefully Tibco will offer it to existing EMS customers at no charge.


©2008 Marc Adler - All Rights Reserved

CEP Forums

We have been asking Coral8 to create a user support forum on their website so that Coral8 users can ask questions of eachother. As long as the various financial institutions don't share their secret sauce, and as long as the discussions are purely technical, then I can imagine that all of the Coral8 users in the various IB's would participate. In particular, I would like to see the sharing of patterns, query optimizations, and adapter functionality.

©2008 Marc Adler - All Rights Reserved

Random Musings

Coral8 just released version 5.3. We asked them for a KDB+ adapter, and they delivered. It was our opinion that KDB+ is used so frequently in capital markets firms that it made perfect sense that the coral8 developer should be able to read data from KDB as easily as they can read data from Oracle or SQL Server. Right now, you still need to write Q queries in Coral8's KDB adapter in order to fetch data from KDB+ .... I was hoping for a way that a developer can write a simple SQL statement and have the KDB adapter translate the query into Q, but that will have to wait for a future version. We ask them to write this stuff and make it available in their core product in the hopes that a lot of people use it, debug it, and ask Coral8 for more enhancements. They did a very basic version, and it will be enhanced per customer demand.

Coral8 also released a Reuters market data adapter, and from what I understand, it will be offered as a separate product that costs a not-too-trivial amount of money. Our internal framework has built-in market data adapters, so we won't be leveraging the Coral8 adapter, especially since it seems like a very vanilla adapter.

It is good that Coral8 has become aware of all of the various adapters needed for firms that do trading. It has been about 6 months since we had to explain what a FIX message was to the Coral8 people, and they have caught on pretty quickly. Coral8 probably had the least amount of captial markets experience of any of the CEP vendors, and they are rapidly catching up.


I read with interest the "exciting announcement" that the banking products side of Aleri were bought by Wall Street Systems. I don't know quite what to make of this. Was this a much-needed infusion of capital? Does Aleri really want to concentrate solely on CEP? Was the banking side of Aleri under-performing? I had lunch yesterday with a vendor of products that are sold into the capital markets space, and the vendor mentioned that the main CEP products that are evaluated are Apama and Streambase, with Coral8 gaining more and more interest. Combined with the difficulty of seeling into capital markets right now, I wonder if this is the first shoe to drop at Aleri.

(A note to PR agencies and Aleri newsletter writers ... you must think that the lives of your readers are pretty drab if you consider the above announcement to be "exciting".... you need to get out of your offices and see what kind of party animals your potential customers are!)

On the plus side, Aleri seems to still be the only company willing to take the STAC challenge. Where are you, Coral8? Apama? Streambase? Esper?


Sprint 2 has completed, and we are starting up Sprint 3. One of the things that is weighing heavily on my mind is the subject of entitlements and Derived Events. Certain users should not see certain derived events at all. Certain users should only see the partial contents of certain derived events.

There is no standard entitlements framework out there. Every IB that I have been with has had multiple custom-built entitlement frameworks. Morgan Stanley had at least 3, and 2 years ago, there was a group that had just been formed in order to build the mother of all entitlement frameworks.

Our entitlements framework needs to work hand-in-hand with our message bus. Most CEP applications are fairly simplistic, and their output goes into a single system, so there is no need for entitlements. Other CEP applications want to publish everything out to everyone ... a surveillance-type CEP application might be crippled if its output is only going to the security guard who is in the middle of a doughnut break. We can't let the prop traders see agency flow, and vice-versa. We can't let certain people on a single desk see what others on the desk are doing ... but the head of the desk should be able to see everything, and if the head of the desk is on vacation, the notifications should be transmitted to a chain of proxies.

It would be great if this kind of feature was built into a CEP engine ... given a derived event and a list of fine-grained entitlements, produce a "recipient list" of what message bus topics we send the derived event to.





©2008 Marc Adler - All Rights Reserved