FrequentWords(Text, k) FrequentPatterns ← an empty set for i ← 0 to |Text| − k Pattern ← the k-mer Text(i, k) Count(i) ← PatternCount(Text, Pattern) maxCount ← maximum value in array Count for i ← 0 to |Text| − k if Count(i) = maxCount add Text(i, k) to FrequentPatterns remove duplicates from FrequentPatterns return FrequentPatterns```
```python k = 3 Text = 'CGATATATCCATAG' def FrequencyMap(Text, k): freq = {} n = len(Text) for i in range(n-k+1): Pattern = Text[i:i+k] freq[Pattern] = 0 for i in range (n-k+1): if Text[i:i+k] == Pattern: freq[Pattern] = freq[Pattern] + 1 return freq print(FrequencyMap(Text, k))
defFrequentWords(Text, k): words = [] freq = FrequencyMap(Text, k) m = max(freq.values()) for key in freq: if freq[key] == m: words.append(key) return words k = 4 Text = 'ACGTTGCATGTCGCATGATGCATGAGAGCT' FrequentWords(Text, k)
# Input: A string Text and an integer k # Output: A list containing all most frequent k-mers in Text defFrequentWords(Text, k): words = [] freq = FrequencyMap(Text, k) m = max(freq.values()) for key in freq: if freq[key] == m: words.append(key) return words # Copy your FrequencyMap() function here. defFrequencyMap(Text, k): freq = {} n = len(Text) for i in range(n-k+1): Pattern = Text[i:i+k] freq[Pattern] = 0 for i in range (n-k+1): if Text[i:i+k] == Pattern: freq[Pattern] = freq[Pattern] + 1 return freq
f = open('v_cholerae_oric.txt', 'r') ori = f.read() Text = 'ACGTTGCATGTCGCATGATGCATGAGAGCT' k = 10 FrequentWords(ori, k)
['CTCTTGATCA', 'TCTTGATCAT']
1
f.close()
1 2 3 4
f = open('v_cholerae_oric.txt', 'r') ori = f.read() print(ori) f.close()
1.3 Some Hidden Messages are More Surprising than Others
1 2 3 4 5 6 7 8
defReverse(Pattern): rev = "" for char in Pattern: rev += Pattern[::-1] return rev Pattern = 'AAAACCCGGT' Reverse(Pattern)
'TGGCCCAAAA'
1 2 3 4 5 6 7
Pattern = "ACGT" rev = "" for i in range(len(Pattern)-1): print(i)
0
1
2
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#else if defComplement(Pattern): com = "" for i in range(len(Pattern)): if Pattern[i] == "A": com += "T" elif Pattern[i] == "T": com += "A" elif Pattern[i] == "C": com += "G" elif Pattern[i] == "G": com += "C" return com
Pattern = "AAAACCCGGT" Complement(Pattern)
'TTTTGGGCCA'
1 2 3 4 5 6 7 8
defComplement(Pattern): basepairs = {"A":"T", "G":"C", "C":"G", "T":"A"} comp = "" for base in Pattern: comp += basepairs.get(base) return comp Pattern = "AAAACCCGGT" Complement(Pattern)
'TTTTGGGCCA'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# Input: A DNA string Pattern # Output: The reverse complement of Pattern defReverseComplement(Pattern): return Reverse(Complement(Pattern)) # Copy your Reverse() function here. defReverse(Pattern): rev = "" for char in Pattern: rev += Pattern[::-1] return rev # Copy your Complement() function here. defComplement(Pattern): basepairs = {"A":"T", "G":"C", "C":"G", "T":"A"} comp = "" for base in Pattern: comp += basepairs.get(base) return comp Pattern = "AAAACCCGGT" ReverseComplement(Pattern)
'ACCGGGTTTT'
1 2 3 4 5 6 7 8 9 10 11
defPatternMatching(Pattern, Genome): positions = [] # output variable # your code here for i in range(len(Genome)-len(Pattern)+1): if Genome[i:i+len(Pattern)] == Pattern: positions.append(i) return positions Pattern = "ATAT" Genome = "GATATATGCATATACTT" PatternMatching(Pattern, Genome)
[1, 3, 9]
1 2 3 4 5 6 7 8 9 10 11 12
defPatternMatching(Pattern, Genome): positions = [] # output variable # your code here for i in range(len(Genome)-len(Pattern)+1): if Genome[i:i+len(Pattern)] == Pattern: positions.append(i) return positions f = open("Vibrio_cholerae.txt", "r") Genome = f.read() Pattern = "CTTGATCAT" PatternMatching(Pattern, Genome)
# Copy your PatternCount function below here defPatternCount(Text, Pattern): count = 0 for i in range(len(Text)-len(Pattern)+1): if Text[i:i+len(Pattern)] == Pattern: count = count + 1 return count
# On the following line, create a variable called Text that is equal to the oriC region from T petrophila Text = "AACTCTATACCTCCTTTTTGTCGAATTTGTGTGATTTATAGAGAAAATCTTATTAACTGAAACTAAAATGGTAGGTTTGGTGGTAGGTTTTGTGTACATTTTGTAGTATCTGATTTTTAATTACATACCGTATATTGTATTAAATTGACGAACAATTGCATGGAATTGAATATATGCAAAACAAACCTACCACCAAACTCTGTATTGACCATTTTAGGACAACTTCAGGGTGGTAGGTTTCTGAAGCTCTCATCAATAGACTATTTTAGTCTTTACAAACAATATTACCGTTCAGATTCAAGATTCTACAACGCTGTTTTAATGGGCGTTGCAGAAAACTTACCACCTAAAATCCAGTATCCAAGCCGATTTCAGAGAAACCTACCACTTACCTACCACTTACCTACCACCCGGGTGGTAAGTTGCAGACATTATTAAAAACCTCATCAGAAGCTTGTTCAAAAATTTCAATACTCGAAACCTACCACCTGCGTCCCCTATTATTTACTACTACTAATAATAGCAGTATAATTGATCTGA"
# On the following line, create a variable called count_1 that is equal to the number of times # that "ATGATCAAG" occurs in Text. Pattern1 = 'ATGATCAAG'
# On the following line, create a variable called count_2 that is equal to the number of times # that "CTTGATCAT" occurs in Text. Pattern2 = 'ATGATCAAG'
# Finally, print the sum of count_1 and count_2 print(PatternCount(Text, Pattern1)+PatternCount(Text, Pattern1))
0
Exercise
1 2 3 4 5 6 7 8 9
Text = "CGCGATACGTTACATACATGATAGACCGCGCGCGATCATATCGCGATTATC" Pattern = 'CGCG' defPatternCount(Text, Pattern): count = 0 for i in range(len(Text)-len(Pattern)+1): if Text[i:i+len(Pattern)] == Pattern: count = count + 1 return count PatternCount(Text, Pattern)