Necesito optimizar este query de MapReduce
Publicado por MFCO (1 intervención) el 27/05/2015 02:47:45
ste SQL me corre en 34 segundos, necesito que me corra en menos! Pero no se si lo que tengo que optimizar es la última parte donde esta el contador o antes! Debe ser com MapReduce.
----- PRIMER PASO
----- PRIMER PASO
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_user_defined = 1 AND name = 'word_t')
CREATE TYPE word_t FROM nvarchar(32) NOT NULL;
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_user_defined = 1 AND name = 'words_t')
CREATE TYPE words_t FROM nvarchar(max) NOT NULL;
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'wordcnts_t')
CREATE TYPE wordcnts_t AS TABLE ( words nvarchar(4000) NOT NULL, countofwords int );
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'DocumentsTable')
CREATE TYPE DocumentsTable AS TABLE (Content nvarchar(max) NOT NULL)
GO
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'DocumentsWithIndexTable')
CREATE TYPE DocumentsWithIndexTable AS TABLE (Indx int, Content nvarchar(max) NOT NULL)
GO
IF NOT EXISTS (SELECT * FROM sys.types WHERE is_table_type = 1 AND name = 'DocumentsWithAutoIncrementIndexTable')
CREATE TYPE DocumentsWithAutoIncrementIndexTable AS TABLE (
P_Id int PRIMARY KEY IDENTITY, Content nvarchar(max) NOT NULL)
GO
------ SEGUNDO PASO
drop function dbo.mapper
go
create function dbo.mapper(@documents db_owner.DocumentsWithAutoIncrementIndexTable READONLY, @sep nvarchar)
returns @t TABLE ( wordindex int, word db_owner.word_t )
WITH EXECUTE AS CALLER
as begin
declare @lastidprocessed int, @istart int
declare @len int, @wordcounter int, @id int, @pos int
declare @document db_owner.words_t, @word db_owner.word_t
select @lastidprocessed=0
While EXISTS(SELECT * From @documents WHERE @lastidprocessed < P_Id)
Begin
Select Top 1 @id = P_Id, @document = Content From @documents WHERE @lastidprocessed < P_Id
set @istart = 1;
set @len = LEN(@document);
-- For every word within a document
While (@istart <= @len)
Begin
set @pos = CHARINDEX ( @sep ,@document, @istart )
if (@pos = 0)
begin
set @word = SUBSTRING(@document, @istart, @len);
insert into @t values ( @wordcounter, @word )
set @istart = @len + 1;
set @wordcounter = @wordcounter + 1;
end
else
begin
set @word = SUBSTRING(@document, @istart, @pos - @istart);
insert into @t values ( @wordcounter, @word )
set @istart = @pos + 1;
set @wordcounter = @wordcounter + 1;
end
End
set @lastidprocessed = @id
End
Return
end
go
------ TERCER PASO
drop function dbo.reducer
go
create function dbo.reducer(@words db_owner.DocumentsWithAutoIncrementIndexTable READONLY)
returns @t TABLE ( word db_owner.word_t, wordcount int )
WITH EXECUTE AS CALLER
as BEGIN
declare @word db_owner.word_t, @previousword db_owner.word_t
declare @lastidprocessed integer, @id integer, @wordoccurrences integer
select @lastidprocessed=0
While EXISTS(SELECT * From @words WHERE @lastidprocessed < P_Id)
Begin
Select Top 1 @id = P_Id, @word = Content From @words WHERE @lastidprocessed < P_Id
-- For every word, see if it's the same as the previous one
if (@word <> @previousword)
begin
If (@previousword <> '')
begin
insert into @t values ( @previousword, @wordoccurrences );
end
set @wordoccurrences = 1;
end
else
begin
set @wordoccurrences = @wordoccurrences + 1;
end
set @previousword = @word;
set @lastidprocessed = @id
End
insert into @t values ( @previousword, @wordoccurrences );
Return
END
go
------ CUARTO PASO
declare @contador integer
select @contador=1
DECLARE @myDocuments db_owner.DocumentsWithAutoIncrementIndexTable
while @contador < 10000
begin
INSERT INTO @myDocuments VALUES('The quick brown fox jumps over the lazy dog');
INSERT INTO @myDocuments VALUES('The brown dog walks through the brown forest');
INSERT INTO @myDocuments VALUES('Little red riding hood looks like a fox in her red dress');
select @contador= @contador+1
end
DECLARE @myWords db_owner.DocumentsWithAutoIncrementIndexTable
INSERT INTO @myWords(Content) SELECT word FROM dbo.mapper(@myDocuments, ' ') order by word asc
select * from dbo.reducer(@myWords) order by wordcount desc
Valora esta pregunta


0