WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit c39e7ae

Browse files
committed
Separate Rules from main
1 parent 5655616 commit c39e7ae

File tree

4 files changed

+163
-161
lines changed

4 files changed

+163
-161
lines changed

.github/workflows/go.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
env:
3333
GOOS: ${{ matrix.goos }}
3434
GOARCH: ${{ matrix.goarch }}
35-
run: go build -v -o sanitizetelebot-${{ matrix.goos }}-${{ matrix.goarch }} sanitizetelebot.go
35+
run: go build -v -o sanitizetelebot-${{ matrix.goos }}-${{ matrix.goarch }} .
3636

3737
- name: Upload artifact
3838
uses: actions/upload-artifact@v4

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ docker run -d -e TELEGRAM_BOT_TOKEN=<your-token> mecoblock/sanitizetelebot
1010
```
1111
alternatively you can use the compose.yml:
1212
```
13-
version: "3.3"
1413
services:
1514
sanitizetelebot:
1615
image: mecoblock/sanitizetelebot
@@ -28,6 +27,6 @@ networks: {}
2827
# To build and run it yourself
2928
1. Get a Telegram Bot Token from BotFather
3029
2. Clone the Repo
31-
3. Open the Terminal in the project directory and type `go build`
30+
3. Open the Terminal in the project directory and type `go build .`
3231
4. Create a token.txt file and paste in your Token from Botfather
3332
5. Run the executable

rules.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package main
2+
3+
// DomainRules contains domain-specific rules for removing query parameters from URLs
4+
var DomainRules = map[string][]string{
5+
"amazon": {"pd_rd_", "_encoding", "psc", "tag", "ref_", "pf_rd_", "pf", "crid"},
6+
"youtube.com": {"feature", "kw", "si", "pp"},
7+
"youtu.be": {"si"},
8+
"twitter.com": {"t", "s", "ref_"},
9+
"x.com": {"t", "s", "ref_"},
10+
"instagram.com": {"igshid"},
11+
"spotify.com": {"si"},
12+
"reddit.com": {"share_id"},
13+
"soundcloud.com": {"si"},
14+
"tiktok": {"_r", "_t"},
15+
}
16+
17+
// URLRules contains a list of query parameter patterns that should be removed from URLs
18+
var URLRules = []string{
19+
"action_object_map",
20+
"action_type_map",
21+
"action_ref_map",
22+
"spm@*.aliexpress.com",
23+
"scm@*.aliexpress.com",
24+
"aff_platform",
25+
"aff_trace_key",
26+
"algo_expid@*.aliexpress.*",
27+
"algo_pvid@*.aliexpress.*",
28+
"btsid",
29+
"ws_ab_test",
30+
"pd_rd_*@amazon.*",
31+
"_encoding@amazon.*",
32+
"psc@amazon.*",
33+
"tag@amazon.*",
34+
"ref_@amazon.*",
35+
"pf_rd_*@amazon.*",
36+
"pf@amazon.*",
37+
"crid@amazon.*",
38+
"keywords@amazon.*",
39+
"sprefix@amazon.*",
40+
"sr@amazon.*",
41+
"ie@amazon.*",
42+
"node@amazon.*",
43+
"qid@amazon.*",
44+
45+
46+
47+
48+
49+
50+
51+
52+
"sc_cid",
53+
"mkt_tok",
54+
"trk",
55+
"trkCampaign",
56+
"ga_*",
57+
"gclid",
58+
"gclsrc",
59+
"hmb_campaign",
60+
"hmb_medium",
61+
"hmb_source",
62+
"spReportId",
63+
"spJobID",
64+
"spUserID",
65+
"spMailingID",
66+
"itm_*",
67+
"s_cid",
68+
"elqTrackId",
69+
"elqTrack",
70+
"assetType",
71+
"assetId",
72+
"recipientId",
73+
"campaignId",
74+
"siteId",
75+
"mc_cid",
76+
"mc_eid",
77+
"pk_*",
78+
"sc_campaign",
79+
"sc_channel",
80+
"sc_content",
81+
"sc_medium",
82+
"sc_outcome",
83+
"sc_geo",
84+
"sc_country",
85+
"nr_email_referer",
86+
"vero_conv",
87+
"vero_id",
88+
"yclid",
89+
"_openstat",
90+
"mbid",
91+
"cmpid",
92+
"cid",
93+
"c_id",
94+
"campaign_id",
95+
"Campaign",
96+
"hash@ebay.*",
97+
"fb_action_ids",
98+
"fb_action_types",
99+
"fb_ref",
100+
"fb_source",
101+
"fbclid",
102+
103+
104+
"gs_l",
105+
"gs_lcp@google.*",
106+
"ved@google.*",
107+
"ei@google.*",
108+
"sei@google.*",
109+
"gws_rd@google.*",
110+
"gs_gbg@google.*",
111+
"gs_mss@google.*",
112+
"gs_rn@google.*",
113+
"_hsenc",
114+
"_hsmi",
115+
"__hssc",
116+
"__hstc",
117+
"hsCtaTracking",
118+
119+
120+
"t@*.twitter.com",
121+
"s@*.twitter.com",
122+
"ref_*@*.twitter.com",
123+
"t@*.x.com",
124+
"s@*.x.com",
125+
"ref_*@*.x.com",
126+
"t@*.fixupx.com",
127+
"s@*.fixupx.com",
128+
"ref_*@*.fixupx.com",
129+
"t@*.fxtwitter.com",
130+
"s@*.fxtwitter.com",
131+
"ref_*@*.fxtwitter.com",
132+
"t@*.twittpr.com",
133+
"s@*.twittpr.com",
134+
"ref_*@*.twittpr.com",
135+
"t@*.fixvx.com",
136+
"s@*.fixvx.com",
137+
"ref_*@*.fixvx.com",
138+
"tt_medium",
139+
"tt_content",
140+
"lr@yandex.*",
141+
"redircnt@yandex.*",
142+
"feature@*.youtube.com",
143+
"kw@*.youtube.com",
144+
"si@*.youtube.com",
145+
"pp@*.youtube.com",
146+
"si@*.youtu.be",
147+
"wt_zmc",
148+
"utm_source",
149+
"utm_content",
150+
"utm_medium",
151+
"utm_campaign",
152+
"utm_term",
153+
154+
"igshid",
155+
"igsh",
156+
157+
158+
}

sanitizetelebot.go

Lines changed: 3 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -239,167 +239,12 @@ func sanitizeURL(text string) (string, bool, bool, []string, []string, error) {
239239
continue
240240
}
241241

242-
// Create query parameter rules based on defaultRules.ts
243-
universalRules := []string{
244-
"action_object_map",
245-
"action_type_map",
246-
"action_ref_map",
247-
"spm@*.aliexpress.com",
248-
"scm@*.aliexpress.com",
249-
"aff_platform",
250-
"aff_trace_key",
251-
"algo_expid@*.aliexpress.*",
252-
"algo_pvid@*.aliexpress.*",
253-
"btsid",
254-
"ws_ab_test",
255-
"pd_rd_*@amazon.*",
256-
"_encoding@amazon.*",
257-
"psc@amazon.*",
258-
"tag@amazon.*",
259-
"ref_@amazon.*",
260-
"pf_rd_*@amazon.*",
261-
"pf@amazon.*",
262-
"crid@amazon.*",
263-
"keywords@amazon.*",
264-
"sprefix@amazon.*",
265-
"sr@amazon.*",
266-
"ie@amazon.*",
267-
"node@amazon.*",
268-
"qid@amazon.*",
269-
270-
271-
272-
273-
274-
275-
276-
277-
"sc_cid",
278-
"mkt_tok",
279-
"trk",
280-
"trkCampaign",
281-
"ga_*",
282-
"gclid",
283-
"gclsrc",
284-
"hmb_campaign",
285-
"hmb_medium",
286-
"hmb_source",
287-
"spReportId",
288-
"spJobID",
289-
"spUserID",
290-
"spMailingID",
291-
"itm_*",
292-
"s_cid",
293-
"elqTrackId",
294-
"elqTrack",
295-
"assetType",
296-
"assetId",
297-
"recipientId",
298-
"campaignId",
299-
"siteId",
300-
"mc_cid",
301-
"mc_eid",
302-
"pk_*",
303-
"sc_campaign",
304-
"sc_channel",
305-
"sc_content",
306-
"sc_medium",
307-
"sc_outcome",
308-
"sc_geo",
309-
"sc_country",
310-
"nr_email_referer",
311-
"vero_conv",
312-
"vero_id",
313-
"yclid",
314-
"_openstat",
315-
"mbid",
316-
"cmpid",
317-
"cid",
318-
"c_id",
319-
"campaign_id",
320-
"Campaign",
321-
"hash@ebay.*",
322-
"fb_action_ids",
323-
"fb_action_types",
324-
"fb_ref",
325-
"fb_source",
326-
"fbclid",
327-
328-
329-
"gs_l",
330-
"gs_lcp@google.*",
331-
"ved@google.*",
332-
"ei@google.*",
333-
"sei@google.*",
334-
"gws_rd@google.*",
335-
"gs_gbg@google.*",
336-
"gs_mss@google.*",
337-
"gs_rn@google.*",
338-
"_hsenc",
339-
"_hsmi",
340-
"__hssc",
341-
"__hstc",
342-
"hsCtaTracking",
343-
344-
345-
"t@*.twitter.com",
346-
"s@*.twitter.com",
347-
"ref_*@*.twitter.com",
348-
"t@*.x.com",
349-
"s@*.x.com",
350-
"ref_*@*.x.com",
351-
"t@*.fixupx.com",
352-
"s@*.fixupx.com",
353-
"ref_*@*.fixupx.com",
354-
"t@*.fxtwitter.com",
355-
"s@*.fxtwitter.com",
356-
"ref_*@*.fxtwitter.com",
357-
"t@*.twittpr.com",
358-
"s@*.twittpr.com",
359-
"ref_*@*.twittpr.com",
360-
"t@*.fixvx.com",
361-
"s@*.fixvx.com",
362-
"ref_*@*.fixvx.com",
363-
"tt_medium",
364-
"tt_content",
365-
"lr@yandex.*",
366-
"redircnt@yandex.*",
367-
"feature@*.youtube.com",
368-
"kw@*.youtube.com",
369-
"si@*.youtube.com",
370-
"pp@*.youtube.com",
371-
"si@*.youtu.be",
372-
"wt_zmc",
373-
"utm_source",
374-
"utm_content",
375-
"utm_medium",
376-
"utm_campaign",
377-
"utm_term",
378-
379-
"igshid",
380-
"igsh",
381-
382-
383-
}
384-
385-
// Host-specific rules
386-
hostRules := map[string][]string{
387-
"amazon": {"pd_rd_", "_encoding", "psc", "tag", "ref_", "pf_rd_", "pf", "crid"},
388-
"youtube.com": {"feature", "kw", "si", "pp"},
389-
"youtu.be": {"si"},
390-
"twitter.com": {"t", "s", "ref_"},
391-
"x.com": {"t", "s", "ref_"},
392-
"instagram.com": {"igshid"},
393-
"spotify.com": {"si"},
394-
"reddit.com": {"share_id"},
395-
"soundcloud.com": {"si"},
396-
"tiktok": {"_r", "_t"},
397-
}
242+
// Use universal rules from rules.go
398243

399244
// Clean universal parameters
400245
q := parsedURL.Query()
401246
for param := range q {
402-
for _, rule := range universalRules {
247+
for _, rule := range URLRules {
403248
if strings.HasPrefix(param, rule) {
404249
q.Del(param)
405250
sanitized = true
@@ -408,7 +253,7 @@ func sanitizeURL(text string) (string, bool, bool, []string, []string, error) {
408253
}
409254

410255
// Clean host-specific parameters
411-
for host, rules := range hostRules {
256+
for host, rules := range DomainRules {
412257
if strings.Contains(parsedURL.Host, host) {
413258
for param := range q {
414259
for _, rule := range rules {

0 commit comments

Comments
 (0)