1 | class Statistics |
---|
2 | |
---|
3 | attr_reader :start_time, |
---|
4 | :range |
---|
5 | |
---|
6 | def initialize |
---|
7 | #@start_time = start_time |
---|
8 | #@range = range |
---|
9 | end |
---|
10 | |
---|
11 | def posts_count_sql |
---|
12 | <<SQL |
---|
13 | SELECT users.id AS id, count(posts.id) AS count |
---|
14 | FROM users |
---|
15 | JOIN people ON people.owner_id = users.id |
---|
16 | LEFT OUTER JOIN posts ON people.id = posts.author_id |
---|
17 | #{self.where_clause_sql} |
---|
18 | GROUP BY users.id |
---|
19 | SQL |
---|
20 | end |
---|
21 | |
---|
22 | def comments_count_sql |
---|
23 | <<SQL |
---|
24 | SELECT users.id AS id, count(comments.id) AS count |
---|
25 | FROM users |
---|
26 | JOIN people ON people.owner_id = users.id |
---|
27 | LEFT OUTER JOIN comments ON people.id = comments.author_id |
---|
28 | #{self.where_clause_sql} |
---|
29 | GROUP BY users.id |
---|
30 | SQL |
---|
31 | end |
---|
32 | |
---|
33 | def invites_sent_count_sql |
---|
34 | <<SQL |
---|
35 | SELECT users.id AS id, count(invitations.id) AS count |
---|
36 | FROM users |
---|
37 | LEFT OUTER JOIN invitations ON users.id = invitations.sender_id |
---|
38 | #{self.where_clause_sql} |
---|
39 | GROUP BY users.id |
---|
40 | SQL |
---|
41 | end |
---|
42 | |
---|
43 | def tags_followed_count_sql |
---|
44 | <<SQL |
---|
45 | SELECT users.id AS id, count(tag_followings.id) AS count |
---|
46 | FROM users |
---|
47 | LEFT OUTER JOIN tag_followings on users.id = tag_followings.user_id |
---|
48 | #{self.where_clause_sql} |
---|
49 | GROUP BY users.id |
---|
50 | SQL |
---|
51 | end |
---|
52 | |
---|
53 | def mentions_count_sql |
---|
54 | <<SQL |
---|
55 | SELECT users.id AS id, count(mentions.id) AS count |
---|
56 | FROM users |
---|
57 | JOIN people on users.id = people.owner_id |
---|
58 | LEFT OUTER JOIN mentions on people.id = mentions.person_id |
---|
59 | #{self.where_clause_sql} |
---|
60 | GROUP BY users.id |
---|
61 | SQL |
---|
62 | end |
---|
63 | |
---|
64 | def contacts_sharing_with_count_sql |
---|
65 | <<SQL |
---|
66 | SELECT users.id AS id, count(contacts.id) AS count |
---|
67 | FROM users |
---|
68 | JOIN contacts on contacts.user_id = users.id |
---|
69 | JOIN aspect_memberships on aspect_memberships.contact_id = contacts.id |
---|
70 | #{self.where_clause_sql} |
---|
71 | GROUP BY users.id |
---|
72 | SQL |
---|
73 | end |
---|
74 | |
---|
75 | def fb_connected_distribution_sql |
---|
76 | <<SQL |
---|
77 | SELECT users.id AS id, users.sign_in_count AS count, count(services.id) AS connected |
---|
78 | FROM users |
---|
79 | LEFT OUTER JOIN services on services.user_id = users.id |
---|
80 | AND services.type = 'Services::Facebook' |
---|
81 | #{self.where_clause_sql} |
---|
82 | GROUP BY users.id, users.sign_in_count |
---|
83 | SQL |
---|
84 | end |
---|
85 | |
---|
86 | def fb_connected_distribution |
---|
87 | User.connection.select_all(fb_connected_distribution_sql).map { |row| |
---|
88 | Hash[ |
---|
89 | row.map { |k,v| |
---|
90 | [k, v.to_i] |
---|
91 | } |
---|
92 | ] |
---|
93 | } |
---|
94 | end |
---|
95 | |
---|
96 | def sign_in_count_sql |
---|
97 | <<SQL |
---|
98 | SELECT users.id AS id, users.sign_in_count AS count |
---|
99 | FROM users |
---|
100 | #{self.where_clause_sql} |
---|
101 | SQL |
---|
102 | end |
---|
103 | |
---|
104 | def correlate(first_metric, second_metric) |
---|
105 | |
---|
106 | # [{"id" => 1 , "count" => 123}] |
---|
107 | |
---|
108 | x_array = [] |
---|
109 | y_array = [] |
---|
110 | |
---|
111 | self.result_hash(first_metric).keys.each do |k| |
---|
112 | if val = self.result_hash(second_metric)[k] |
---|
113 | x_array << self.result_hash(first_metric)[k] |
---|
114 | y_array << val |
---|
115 | end |
---|
116 | end |
---|
117 | |
---|
118 | correlation(x_array, y_array) |
---|
119 | end |
---|
120 | |
---|
121 | def generate_correlations |
---|
122 | result = {} |
---|
123 | [:posts_count, :comments_count, :invites_sent_count, #:tags_followed_count, |
---|
124 | :mentions_count, :contacts_sharing_with_count].each do |metric| |
---|
125 | result[metric] = self.correlate(metric,:sign_in_count) |
---|
126 | end |
---|
127 | result |
---|
128 | end |
---|
129 | |
---|
130 | def correlation(x_array, y_array) |
---|
131 | sum = 0.0 |
---|
132 | x_array.each_index do |i| |
---|
133 | sum = sum + x_array[i].to_f * y_array[i].to_f |
---|
134 | end |
---|
135 | x_y_mean = sum/x_array.length.to_f |
---|
136 | x_mean = mean(x_array) |
---|
137 | y_mean = mean(y_array) |
---|
138 | |
---|
139 | st_dev_x = standard_deviation(x_array) |
---|
140 | st_dev_y = standard_deviation(y_array) |
---|
141 | |
---|
142 | (x_y_mean - (x_mean*y_mean))/(st_dev_x * st_dev_y) |
---|
143 | end |
---|
144 | |
---|
145 | def mean(array) |
---|
146 | sum = array.inject(0.0) do |sum, val| |
---|
147 | sum += val.to_f |
---|
148 | end |
---|
149 | sum / array.length |
---|
150 | end |
---|
151 | |
---|
152 | def standard_deviation(array) |
---|
153 | variance = lambda do |
---|
154 | m = mean(array) |
---|
155 | sum = 0.0 |
---|
156 | array.each{ |v| sum += (v.to_f-m)**2 } |
---|
157 | sum/array.length.to_f |
---|
158 | end.call |
---|
159 | |
---|
160 | Math.sqrt(variance) |
---|
161 | end |
---|
162 | |
---|
163 | ### % of cohort came back last week |
---|
164 | def retention(n) |
---|
165 | users_by_week(n).count.to_f/week_created(n).count |
---|
166 | end |
---|
167 | |
---|
168 | def top_active_users(n) |
---|
169 | ten_percent_lim = (users_by_week(n).count.to_f * 0.3).ceil |
---|
170 | users_by_week(n).joins(:person => :profile).where('users.sign_in_count > 4').order("users.sign_in_count DESC").limit(ten_percent_lim).select('users.email, users.username, profiles.first_name, users.sign_in_count') |
---|
171 | end |
---|
172 | |
---|
173 | def users_by_week(n) |
---|
174 | week_created(n).where("current_sign_in_at > ?", Time.now - 1.week) |
---|
175 | end |
---|
176 | |
---|
177 | protected |
---|
178 | def where_clause_sql |
---|
179 | if AppConfig.postgres? |
---|
180 | "WHERE users.created_at > NOW() - '1 month'::INTERVAL" |
---|
181 | else |
---|
182 | "where users.created_at > FROM_UNIXTIME(#{(Time.now - 1.month).to_i})" |
---|
183 | end |
---|
184 | end |
---|
185 | |
---|
186 | def week_created(n) |
---|
187 | User.where("username IS NOT NULL").where("users.created_at > ? and users.created_at < ?", Time.now - (n+1).weeks, Time.now - n.weeks) |
---|
188 | end |
---|
189 | |
---|
190 | #@param [Symbol] input type |
---|
191 | #@returns [Hash] of resulting query |
---|
192 | def result_hash(type) |
---|
193 | instance_hash = self.instance_variable_get("@#{type.to_s}_hash".to_sym) |
---|
194 | unless instance_hash |
---|
195 | post_count_array = User.connection.select_all(self.send("#{type.to_s}_sql".to_sym)) |
---|
196 | |
---|
197 | instance_hash = {} |
---|
198 | post_count_array.each{ |h| instance_hash[h['id']] = h["count"]} |
---|
199 | self.instance_variable_set("@#{type.to_s}_hash".to_sym, instance_hash) |
---|
200 | end |
---|
201 | instance_hash |
---|
202 | end |
---|
203 | end |
---|