The script to convert arabic and persian text is ready (Python, tested with Ubuntu):
#!/usr/bin/python
# coding=utf-8
import sys
class ArabicCharacter:
def LeftRightLigature(self, c01, c11, c10):
self.c10 = c10
self.c01 = c01
self.c11 = c11
self.left = 1
self.right = 1
def LeftLigature(self, c10):
self.c10 = c10
self.c01 = u' '
self.c11 = u' '
self.left = 1
self.right = 0
def LeftLigatureFromRight(self, cNext, c0, c1):
self.specialRight[cNext] = [c0, c1]
def __init__(self, c00):
self.c00 = c00
self.c10 = u' '
self.c01 = u' '
self.c11 = u' '
self.left = 0
self.right = 0
self.specialRight = {}
alphabet = {
u' ' : ArabicCharacter(u' '),
u'؟' : ArabicCharacter(u'؟'),
u'ا' : ArabicCharacter(u'ﺍ'),
u'ب' : ArabicCharacter(u'ﺏ'),
u'ت' : ArabicCharacter(u'ﺕ'),
u'ث' : ArabicCharacter(u'ﺙ'),
u'پ' : ArabicCharacter(u'ﭖ'),
u'ج' : ArabicCharacter(u'ﺝ'),
u'ح' : ArabicCharacter(u'ﺡ'),
u'خ' : ArabicCharacter(u'ﺥ'),
u'چِ' : ArabicCharacter(u'ﭺ'),
u'د' : ArabicCharacter(u'ﺩ'),
u'ذ' : ArabicCharacter(u'ﺫ'),
u'ر' : ArabicCharacter(u'ﺭ'),
u'ز' : ArabicCharacter(u'ﺯ'),
u'ژ' : ArabicCharacter(u'ﮊ'),
u'س' : ArabicCharacter(u'ﺱ'),
u'ش' : ArabicCharacter(u'ﺵ'),
u'ص' : ArabicCharacter(u'ﺹ'),
u'ض' : ArabicCharacter(u'ﺽ'),
u'ط' : ArabicCharacter(u'ﻁ'),
u'ظ' : ArabicCharacter(u'ﻅ'),
u'ع' : ArabicCharacter(u'ع'),
u'غ' : ArabicCharacter(u'ﻍ'),
u'ف' : ArabicCharacter(u'ﻑ'),
u'ق' : ArabicCharacter(u'ﻕ'),
u'ك' : ArabicCharacter(u'ﻙ'),
u'گ' : ArabicCharacter(u'ﮒ'),
u'ل' : ArabicCharacter(u'ﻝ'),
u'م' : ArabicCharacter(u'ﻡ'),
u'ن' : ArabicCharacter(u'ﻥ'),
u'ه' : ArabicCharacter(u'ﻩ'),
u'و' : ArabicCharacter(u'ﻭ'),
u'ي' : ArabicCharacter(u'ﻱ'),
u'آ' : ArabicCharacter(u'ﺁ'),
u'أ' : ArabicCharacter(u'ﺃ'),
u'ة' : ArabicCharacter(u'ﺓ'),
u'ى' : ArabicCharacter(u'ﻯ'),
u'ؤ' : ArabicCharacter(u'ﺅ'),
u'إ' : ArabicCharacter(u'ﺇ'),
u'ئ' : ArabicCharacter(u'ﺉ')
}
alphabet[u'ا'].LeftLigature(u'ﺎ')
alphabet[u'ب'].LeftRightLigature(u'ﺑ', u'ﺒ', u'ﺐ')
alphabet[u'ت'].LeftRightLigature(u'ﺗ', u'ﺘ', u'ﺖ')
alphabet[u'ث'].LeftRightLigature(u'ﺛ', u'ﺜ', u'ﺚ')
alphabet[u'پ'].LeftRightLigature(u'ﭘ', u'ﭙ', u'ﭗ')
alphabet[u'ج'].LeftRightLigature(u'ﺟ', u'ﺠ', u'ﺞ')
alphabet[u'ح'].LeftRightLigature(u'ﺣ', u'ﺤ', u'ﺢ')
alphabet[u'خ'].LeftRightLigature(u'ﺧ', u'ﺨ', u'ﺦ')
alphabet[u'چِ'].LeftRightLigature(u'ﭼ', u'ﭽ', u'ﭻ')
alphabet[u'د'].LeftLigature(u'ﺪ')
alphabet[u'ذ'].LeftLigature(u'ﺬ')
alphabet[u'ر'].LeftLigature(u'ﺮ')
alphabet[u'ز'].LeftLigature(u'ﺰ')
alphabet[u'ژ'].LeftLigature(u'ﮋ')
alphabet[u'س'].LeftRightLigature(u'ﺳ', u'ﺴ', u'ﺲ')
alphabet[u'ش'].LeftRightLigature(u'ﺷ', u'ﺸ', u'ﺶ')
alphabet[u'ص'].LeftRightLigature(u'ﺻ', u'ﺼ', u'ﺺ')
alphabet[u'ض'].LeftRightLigature(u'ﺿ', u'ﻀ', u'ﺾ')
alphabet[u'ط'].LeftRightLigature(u'ﻃ', u'ﻂ', u'ﻂ')
alphabet[u'ظ'].LeftRightLigature(u'ﻇ', u'ﻆ', u'ﻈ')
alphabet[u'ع'].LeftRightLigature(u'ﻋ', u'ﻌ', u'ﻊ')
alphabet[u'غ'].LeftRightLigature(u'ﻏ', u'ﻐ', u'ﻎ')
alphabet[u'ف'].LeftRightLigature(u'ﻓ', u'ﻔ', u'ﻑ')
alphabet[u'ق'].LeftRightLigature(u'ﻗ', u'ﻘ', u'ﻖ')
alphabet[u'ك'].LeftRightLigature(u'ﻛ', u'ﻛ', u'ﻚ')
alphabet[u'گ'].LeftRightLigature(u'ﮔ', u'ﮕ', u'ﮓ')
alphabet[u'ل'].LeftRightLigature(u'ﻟ', u'ﻠ', u'ﻞ')
alphabet[u'ل'].LeftLigatureFromRight(u'ا', u'ﻻ', u'ﻼ')
alphabet[u'ل'].LeftLigatureFromRight(u'آ', u'ﻵ', u'ﻶ')
alphabet[u'ل'].LeftLigatureFromRight(u'أ', u'ﻷ', u'ﻸ')
alphabet[u'ل'].LeftLigatureFromRight(u'إ', u'ﻹ', u'ﻺ')
alphabet[u'م'].LeftRightLigature(u'ﻣ', u'ﻤ', u'ﻢ')
alphabet[u'ن'].LeftRightLigature(u'ﻧ', u'ﻨ', u'ﻦ')
alphabet[u'ه'].LeftRightLigature(u'ﻫ', u'ﻬ', u'ﻪ')
alphabet[u'و'].LeftLigature(u'ﻮ')
alphabet[u'ي'].LeftRightLigature(u'ﻳ', u'ﻴ', u'ﻲ')
alphabet[u'آ'].LeftLigature(u'ﺂ')
alphabet[u'أ'].LeftLigature(u'ﺄ')
alphabet[u'ة'].LeftLigature(u'ﺔ')
alphabet[u'ى'].LeftLigature(u'ﻰ')
alphabet[u'ؤ'].LeftLigature(u'ﺆ')
alphabet[u'إ'].LeftLigature(u'ﺈ')
alphabet[u'ئ'].LeftRightLigature(u'ﺋ', u'ﺌ', u'ﺊ')
#Return the new character and how many character must be skipped : [newChar, skip]
def generateLigature(prevC, currC, nextC):
prevCarabic = alphabet.get(prevC)
currCarabic = alphabet.get(currC)
nextCarabic = alphabet.get(nextC)
if currCarabic is None:
print 'Warning: the character <'+currC+'> is not arabic'
return [currC, 0]
left = 0
if prevCarabic is not None and prevCarabic.right and currCarabic.left:
left = 1
specialRight = currCarabic.specialRight.get(nextC);
if specialRight is not None:
if left == 1:
return [specialRight[1], 1]
else:
return [specialRight[0], 1]
right = 0
if nextCarabic is not None and nextCarabic.left and currCarabic.right:
right = 1
if left == 1 and right == 1:
return [currCarabic.c11, 0]
elif left == 0 and right == 1:
return [currCarabic.c01, 0]
elif left == 1 and right == 0:
return [currCarabic.c10, 0]
else:
return [currCarabic.c00, 0]
#Get input
if len(sys.argv) != 2:
print 'Help: ./teeworlds-arabic-converter.py <TEXT_IN_ARABIC>'
sys.exit(0);
inputTxt = unicode(str(sys.argv[1]), "utf-8")
#Ligature convertion
ligatureTxt = u''
skip = 0
for i, currC in enumerate(inputTxt):
if skip > 0:
skip = skip-1
else:
prevC = u' '
if (i>0):
prevC = inputTxt[i-1]
nextC = u' '
if ((i+1)<len(inputTxt)):
nextC = inputTxt[i+1]
res = generateLigature(prevC, currC, nextC)
newC = res[0]
skip = res[1]
ligatureTxt += newC
#Reverse character order
reversedTxt = ligatureTxt[::-1]
#Deliver Result
print 'Input: '+inputTxt
print 'Output: '+reversedTxt