I'm using 'text' instead of char. That seemed to cut the bloat down by about
30%. There's only about 11 tables in the DB. I included them at the bottom
in case you're interested.
Perhaps there's just something fundamentally boneheaded about my design
(re-reading "Database Design for Mere Mortals" as I write).
-Tony
CREATE SEQUENCE FileIndicies_fileID_seq;
CREATE TABLE FileIndicies (
fileID integer DEFAULT nextval('FileIndicies_fileID_seq') UNIQUE NOT
NULL, --PK
szFileName text NOT NULL,
szFileType text,
CreationDate date,
CreationTime time,
dtimestampresolution float4,
dtimespan float4,
szAppName text,
szFileComment text,
PRIMARY KEY (fileID),
UNIQUE (szFileName, szFileType, CreationDate, CreationTime)
);
CREATE SEQUENCE EntityFile_dbIndex_seq;
CREATE TABLE EntityFile (
EntityID integer, --PK
fileID integer REFERENCES FileIndicies (fileID),
dbIndex integer DEFAULT nextval('EntityFile_dbIndex_seq') UNIQUE NOT
NULL,
PRIMARY KEY (fileID, EntityID)
);
-- AnalogIndex
CREATE TABLE AnalogIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dsamplerate float4 CHECK (dsamplerate > (0)::float4),
dminval float4,
dmaxval float4 CHECK (dmaxval >= dminval),
szunits text,
dresolution float4 CHECK (dresolution > (0)::float4),
dlocationx float4,
dlocationy float4,
dlocationz float4,
dlocationuser float4,
dhighfreqcorner float4 CHECK (dhighfreqcorner >= (0)::float4),
dwhighfreqorder float4 CHECK (dwhighfreqorder >= (0)::float4),
szhighfiltertype text,
dlowfreqcorner float4 CHECK (dlowfreqcorner >= (0)::float4),
dwlowfreqorder float4 CHECK (dwlowfreqorder >= (0)::float4),
szlowfiltertype float4,
szprobeinfo text,
PRIMARY KEY (dbIndex)
);
CREATE TABLE AnalogValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dtimestamp float4 NOT NULL,
dvalue float4,
PRIMARY KEY (dbIndex, dtimestamp)
);
CREATE TABLE EventIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dweventtype smallint CHECK (dweventtype >=0 AND dweventtype <= 4),
dwmindatalength smallint CHECK (dwmindatalength > (0)::float4),
dwmaxdatalength smallint CHECK (dwmaxdatalength >= dwmindatalength),
szcsvdesc text,
PRIMARY KEY (dbIndex)
);
CREATE TABLE EventValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dtimestamp float4,
dwdatabytesize smallint CHECK (dwdatabytesize > (0)::float4),
eventvalue text,
PRIMARY KEY (dbIndex, dtimestamp)
);
CREATE TABLE NeuralIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwsourceentityid smallint,
dwsourceunitid smallint,
szprobeinfo text,
PRIMARY KEY (dbIndex)
);
CREATE TABLE NeuralValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dtimestamp float4 NOT NULL,
PRIMARY KEY (dbIndex, dtimestamp)
);
CREATE TABLE SegmentIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwsourcecount smallint CHECK (dwsourcecount > (0)::float4),
dwminsamplecount smallint CHECK (dwminsamplecount > (0)::float4),
dwmaxsamplecount smallint CHECK (dwmaxsamplecount > (0)::float4),
dsamplerate float4 CHECK (dsamplerate > (0)::float4),
szunits text,
PRIMARY KEY (dbIndex)
);
CREATE TABLE SegmentSourceIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwsourceid integer CHECK (dwsourceid >= 0),
dminval float4,
dmaxval float4 CHECK (dmaxval >= dminval),
dresolution float4 CHECK (dresolution > (0)::float4),
dsubsampleshift float4,
dlocationx float4,
dlocationy float4,
dlocationz float4,
dlocationuser float4,
dhighfreqcorner float4 CHECK (dhighfreqcorner >= (0)::float4),
dwhighfreqorder float4 CHECK (dwhighfreqorder >= (0)::float4),
szhighfiltertype text,
dlowfreqcorner float4 CHECK (dlowfreqcorner >= (0)::float4),
dwlowfreqorder float4 CHECK (dwlowfreqorder >= (0)::float4),
szlowfiltertype text,
szprobeinfo text,
PRIMARY KEY (dbIndex, dwsourceid)
);
CREATE TABLE SegmentValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwunitid smallint,
dwsampleindex smallint,
dtimestamp float4,
dvalue float4,
PRIMARY KEY (dbIndex, dtimestamp, dwsampleindex, dwunitid)
);
CREATE VIEW SegmentData AS
SELECT a.szFileName, a.szFileType, a.creationdate, a.creationtime,
c.EntityID,
b.dwunitid, b.dwsampleindex, b.dtimestamp, b.dvalue
FROM FileIndicies AS a, SegmentValues AS b, EntityFile AS c
WHERE
b.dbIndex = c.dbIndex AND
a.fileID = c.fileID
ORDER BY a.szFileName, a.szFileType, a.creationdate, a.creationtime,
c.EntityID,
b.dwunitid, b.dwsampleindex, b.dtimestamp;
----- Original Message -----
From: "Douglas Trainor" <***@uic.edu>
To: "Tony Reina" <***@hotmail.com>
Cc: <pgsql-***@postgresql.org>
Sent: Thursday, April 08, 2004 1:41 PM
Subject: Re: [ADMIN] Database storage bloat
Post by Douglas TrainorSaying "we've set field sizes to their theoretical skinniness" makes me
think that
you may have the wrong data types. For example, you may have used CHAR
and not VARCHAR.
douglas
Post by Tony ReinaI'm developing a database for scientific recordings. These recordings
are traditionally saved as binary flat files for simplicity and
compact storage. Although I think ultimately having a database is
better than 1,000s of flat files in terms of data access, I've found
that the database (or at least my design) is pretty wasteful on
storage space compared with the binary flat files.
In particular, I tried importing all of the data from a binary flat
file that is 1.35 MB into a PostgreSQL database (a very small test
file; average production file is probably more like 100 MB). The
database directory ballooned from 4.1 MB to 92 MB (a bloat of 65X the
original storage of the binary flat file).
Now I know that table design and normalizing is important. As far as
my partner and I can tell, we've made good use of normalizing (no
redundancy), we've set field sizes to their theoretical skinniness,
[...]
---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?
http://www.postgresql.org/docs/faqs/FAQ.html