[.NET MVC] Receive text data from third party request with different encoding (ShiftJIS > UTF-8)

#region SHIFT_JIS CODE MAP
// Get from http://charset.7jp.net/jis0208.html.
private static readonly string[] ShiftJisCode = { "8145", "____", "8140", "8141", "8142", "8143", "8144", "8146", ... and so on };
private static readonly string[] JisCode = { "2126", "__5F", "2121", "2122", "2123", "2124", "2125", "2127", "2128", ... and so on };
private static readonly string[] ShiftJisString = { "・", "_", " ", "、", "。", ",", ".", ":", ";", "?", "!", "゛", ... and so on };
#endregion
public static string DecodeShiftJs(string value) {
 if (value == null ||
  !value.Contains('%')) {
  return value;
 }
 var splitedBySpace = value.Split(new [] {
  '+'
 }, StringSplitOptions.RemoveEmptyEntries);

 var output = string.Empty;

 var utf8Part = string.Empty;

 var nextScanPosition = 0;

 var isHtmlUrlEncodedChar = false;
 var htmlUrlDecodedChar = string.Empty;

 foreach(var part in splitedBySpace) {
  var splitedCode = part.Split(new [] {
   '%'
  }, StringSplitOptions.RemoveEmptyEntries);

  if (!part.Contains("%")) {
   output += part + ' ';
   nextScanPosition = 0;

   continue;
  }

  for (var i = 0; i & lt; splitedCode.Length; i++) {
   var codeSet = string.Empty;

   switch (splitedCode[i].Length) {
    case 2:
     if (splitedCode.Length == 1) {
      isHtmlUrlEncodedChar = true;
      htmlUrlDecodedChar = WebUtility.UrlDecode("%" + splitedCode[i]);
     } else {
      var nextSplitedCode = splitedCode[i + 1];
      if (nextSplitedCode.Length & gt; 2) {
       utf8Part = nextSplitedCode.Substring(2);
       nextSplitedCode = nextSplitedCode.Substring(0, 2);
      }

      codeSet = splitedCode[i] + nextSplitedCode;
      nextScanPosition = i + 2;
     }

     i++;
     break;
    case 3:
     if (nextScanPosition == i) {
      var lastDigit = splitedCode[i][2].ToString();

      var lastDigitPos = ShiftJisString.Select((val, id) = & gt; new {
        val,
        id
       })
       .FirstOrDefault(x = & gt; string.Equals(x.val,
        ConvertUtf8AlphaNumericToShiftJis(lastDigit),
        StringComparison.InvariantCulture));

      if (lastDigitPos != null) {
       var jisCode = JisCode[lastDigitPos.id];

       codeSet = splitedCode[i].Remove(2) + jisCode.Remove(0, jisCode.Length - 2);
      }

      nextScanPosition++;
     } else {
      utf8Part = splitedCode[i].Substring(2);
      splitedCode[i] = splitedCode[i].Remove(2);
      i--;
     }
     break;
    default:
     if (i == 0 || splitedCode[i - 1].Length & gt; 2) {
      utf8Part = splitedCode[i].Substring(3);
      splitedCode[i] = splitedCode[i].Remove(3);
      i--;
     } else {
      if (nextScanPosition == i) {
       utf8Part = splitedCode[i].Substring(3);
       splitedCode[i] = splitedCode[i].Remove(3);
      } else {
       utf8Part = splitedCode[i].Substring(2);
       splitedCode[i] = splitedCode[i].Remove(2);
      }
      i--;
     }

     break;
   }
   if (isHtmlUrlEncodedChar) {
    output += htmlUrlDecodedChar;

    // Reset.
    isHtmlUrlEncodedChar = false;
    htmlUrlDecodedChar = string.Empty;
   } else {
    var shiftJis = ShiftJisCode.Select((code, index) = & gt; new {
      code,
      index
     })
     .FirstOrDefault(x = & gt; string.Equals(x.code,
      codeSet,
      StringComparison.InvariantCulture));

    if (shiftJis != null) {
     output += ShiftJisString[shiftJis.index] + utf8Part;

     // Reset.
     utf8Part = string.Empty;
    } else {
     var codeSetToUrlEncodedString = codeSet;
     while (codeSet.Length & gt; = 2) {
      var urlDecodedValue = WebUtility.UrlDecode("%" + codeSet.Substring(0, 2));

      if (urlDecodedValue == "�") {
       i--;
       nextScanPosition--;

       // Reset.
       utf8Part = string.Empty;
      } else {
       output += urlDecodedValue;
      }

      codeSet = codeSet.Remove(0, 2);
     }

     if (!string.IsNullOrEmpty(codeSetToUrlEncodedString)) {
      output += utf8Part;

      // Reset.
      utf8Part = string.Empty;
     }
    }
   }
  }

  output += ' ';
  nextScanPosition = 0;
 }

 output = output.TrimEnd();

 return output;
}
Advertisements