bajapro/online-compiler/compiler/views.py
2025-06-07 16:18:13 +07:00

425 lines
16 KiB
Python

import os
from django.http import JsonResponse
from lib.FileUploader import FileUploader
from lib.java_runner import JavaRunner
from django.views.decorators.csrf import csrf_exempt
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer, util
import torch
import re
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import nltk
import logging
# Create a logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# Create a file handler
file_handler = logging.FileHandler('grade_generator.log')
file_handler.setLevel(logging.INFO)
# Create a console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Create a formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Add the handlers to the logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# ...
@csrf_exempt
def generate_grade(request):
# ...
try:
# ...
logger.info('Grade generated successfully')
return JsonResponse({
'output': normalized_value
}, status=200)
except Exception as e:
logger.error(f'Error generating grade: {str(e)}')
return JsonResponse({
'error': str(e)
}, status=500)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')
from nltk.corpus import wordnet
import numpy as np
from scipy.stats import pearsonr
@csrf_exempt
def index(request):
user_dir = request.POST["user"]
code = request.POST["code"]
java_runner = JavaRunner(user_directory=user_dir, code=code)
res = java_runner.run()
return JsonResponse({
'output': res
}, status=200)
@csrf_exempt
def upload_java_test_file(request):
file = request.FILES["file"]
fu = FileUploader(filename=file, file=file)
fu.upload()
return JsonResponse({
"status": "ok"
}, status=200)
def get_test_file_list(request):
print(request)
return JsonResponse({
"file_list": os.listdir("java_files/test_cases")
}, status=200)
@csrf_exempt
def delete_test(request):
filename = request.POST["filename"]
print(filename)
try:
os.remove("java_files/test_cases/{}".format(filename))
except FileNotFoundError as err:
return JsonResponse({"error": str(err), "status": "failed"}, status=500)
return JsonResponse({"message": "{} deleted".format(filename), "status": "success"}, status=200)
@csrf_exempt
def generate_grade(request):
esay_answer = request.POST["esay_answer"]
esay_answer2 = request.POST["esay_answer2"]
esay_answer3 = request.POST["esay_answer3"]
esay_answer4 = request.POST["esay_answer4"]
user_answer = request.POST["user_answer"]
# Fungsi untuk mendapatkan sinonim dari sebuah kata menggunakan WordNet Bahasa Indonesia
# def get_synonyms(word):
# synonyms = []
# for synset in wordnet.synsets(word, lang='ind'):
# for lemma in synset.lemma_names('ind'):
# synonyms.append(lemma)
# return set(synonyms)
# Memecah kalimat menjadi kata-kata
# kata_kata1 = nltk.word_tokenize(user_answer)
# kata_kata2 = nltk.word_tokenize(esay_answer)
# kata_kata3 = nltk.word_tokenize(esay_answer2)
# kata_kata4 = nltk.word_tokenize(esay_answer3)
# kata_kata5 = nltk.word_tokenize(esay_answer4)
# Mencari sinonim dari setiap kata dalam kalimat2 dan mengganti jika sinonimnya ada dalam kalimat1
# kalimat1_dengan_kalimat2 = []
# for kata in kata_kata1:
# sinonim_kata1 = get_synonyms(kata)
# if kata in kata_kata2:
# kalimat1_dengan_kalimat2.append(kata)
# elif sinonim_kata1.intersection(set(kata_kata2)):
# kalimat1_dengan_kalimat2.append(list(sinonim_kata1.intersection(set(kata_kata2)))[0])
# else:
# kalimat1_dengan_kalimat2.append(kata)
# user_answer = ' '.join(kalimat1_dengan_kalimat2)
# print("\nKalimat 1 dengan sinonim yang berasal dari kalimat 2:")
# print(user_answer)
# kalimat1_dengan_kalimat3 = []
# for kata in kata_kata1:
# sinonim_kata1 = get_synonyms(kata)
# if kata in kata_kata3:
# kalimat1_dengan_kalimat3.append(kata)
# elif sinonim_kata1.intersection(set(kata_kata3)):
# kalimat1_dengan_kalimat3.append(list(sinonim_kata1.intersection(set(kata_kata3)))[0])
# else:
# kalimat1_dengan_kalimat3.append(kata)
# user_answer = ' '.join(kalimat1_dengan_kalimat3)
# print("\nKalimat 1 dengan sinonim yang berasal dari kalimat 3:")
# print(user_answer)
# kalimat1_dengan_kalimat4 = []
# for kata in kata_kata1:
# sinonim_kata1 = get_synonyms(kata)
# if kata in kata_kata4:
# kalimat1_dengan_kalimat4.append(kata)
# elif sinonim_kata1.intersection(set(kata_kata4)):
# kalimat1_dengan_kalimat4.append(list(sinonim_kata1.intersection(set(kata_kata4)))[0])
# else:
# kalimat1_dengan_kalimat4.append(kata)
# user_answer = ' '.join(kalimat1_dengan_kalimat4)
# print("\nKalimat 1 dengan sinonim yang berasal dari kalimat 4:")
# print(user_answer)
# kalimat1_dengan_kalimat5 = []
# for kata in kata_kata1:
# sinonim_kata1 = get_synonyms(kata)
# if kata in kata_kata5:
# kalimat1_dengan_kalimat5.append(kata)
# elif sinonim_kata1.intersection(set(kata_kata5)):
# kalimat1_dengan_kalimat5.append(list(sinonim_kata1.intersection(set(kata_kata5)))[0])
# else:
# kalimat1_dengan_kalimat5.append(kata)
# user_answer = ' '.join(kalimat1_dengan_kalimat5)
# print("\nKalimat 1 dengan sinonim yang berasal dari kalimat 5:")
# print(user_answer)
# Kalimat yang diproses
sentence1 = user_answer
sentence2 = esay_answer
sentence3 = esay_answer2
sentence4 = esay_answer3
sentence5 = esay_answer4
# Mengubah kata menjadi huruf kecil
sentence1 = sentence1.lower()
sentence2 = sentence2.lower()
sentence3 = sentence3.lower()
sentence4 = sentence4.lower()
sentence5 = sentence5.lower()
# Fungsi untuk menghapus tanda baca yang tidak penting
def remove_punctuation(text):
return re.sub(r'[^\w\s+=<>*&%-]', '', text)
# Menghapus tanda baca yang tidak penting dari setiap kalimat
sentence1 = remove_punctuation(sentence1)
sentence2 = remove_punctuation(sentence2)
sentence3 = remove_punctuation(sentence3)
sentence4 = remove_punctuation(sentence4)
sentence5 = remove_punctuation(sentence5)
# Menghapus kata tidak penting
# factory = StopWordRemoverFactory()
# stopwords = factory.create_stop_word_remover()
# sentence1 = stopwords.remove(sentence1)
# sentence2 = stopwords.remove(sentence2)
# sentence3 = stopwords.remove(sentence3)
# sentence4 = stopwords.remove(sentence4)
# sentence5 = stopwords.remove(sentence5)
# Mengubah kata menjadi bentuk dasar
# Fact = StemmerFactory()
# Stemmer = Fact.create_stemmer()
# sentence1 = Stemmer.stem(sentence1)
# sentence2 = Stemmer.stem(sentence2)
# sentence3 = Stemmer.stem(sentence3)
# sentence4 = Stemmer.stem(sentence4)
# sentence5 = Stemmer.stem(sentence5)
# Inisialisasi stemmer
factory = StemmerFactory()
stemmer = factory.create_stemmer()
# Memisahkan tanda baca dan kata-kata menggunakan tokenizer dari nltk
tokens1 = nltk.word_tokenize(sentence1)
tokens2 = nltk.word_tokenize(sentence2)
tokens3 = nltk.word_tokenize(sentence3)
tokens4 = nltk.word_tokenize(sentence4)
tokens5 = nltk.word_tokenize(sentence5)
# Melakukan stemming hanya pada kata-kata
stemmed_tokens1 = [stemmer.stem(token) if token.isalpha() else token for token in tokens1]
stemmed_tokens2 = [stemmer.stem(token) if token.isalpha() else token for token in tokens2]
stemmed_tokens3 = [stemmer.stem(token) if token.isalpha() else token for token in tokens3]
stemmed_tokens4 = [stemmer.stem(token) if token.isalpha() else token for token in tokens4]
stemmed_tokens5 = [stemmer.stem(token) if token.isalpha() else token for token in tokens5]
# Menggabungkan kembali kata-kata yang telah distem ke dalam teks, sambil mempertahankan tanda baca
sentence1 = ' '.join(stemmed_tokens1)
sentence2 = ' '.join(stemmed_tokens2)
sentence3 = ' '.join(stemmed_tokens3)
sentence4 = ' '.join(stemmed_tokens4)
sentence5 = ' '.join(stemmed_tokens5)
# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
model = AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
# Tokenize sentences
encoded_input1 = tokenizer(sentence1, padding=True, truncation=True, return_tensors='pt')
encoded_input2 = tokenizer(sentence2, padding=True, truncation=True, return_tensors='pt')
encoded_input3 = tokenizer(sentence3, padding=True, truncation=True, return_tensors='pt')
encoded_input4 = tokenizer(sentence4, padding=True, truncation=True, return_tensors='pt')
encoded_input5 = tokenizer(sentence5, padding=True, truncation=True, return_tensors='pt')
# Compute token embeddings for each sentence
with torch.no_grad():
model_output1 = model(**encoded_input1)
model_output2 = model(**encoded_input2)
model_output3 = model(**encoded_input3)
model_output4 = model(**encoded_input4)
model_output5 = model(**encoded_input5)
# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output.last_hidden_state
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
# Perform pooling. In this case, average pooling
sentence_embeddings1 = mean_pooling(model_output1, encoded_input1['attention_mask'])
sentence_embeddings2 = mean_pooling(model_output2, encoded_input2['attention_mask'])
sentence_embeddings3 = mean_pooling(model_output3, encoded_input3['attention_mask'])
sentence_embeddings4 = mean_pooling(model_output4, encoded_input4['attention_mask'])
sentence_embeddings5 = mean_pooling(model_output5, encoded_input5['attention_mask'])
# print("Sentence Embedding 1:", sentence_embeddings1)
# print("Sentence Embedding 2:", sentence_embeddings2)
# print("Sentence Embedding 3:", sentence_embeddings3)
# print("Sentence Embedding 4:", sentence_embeddings4)
# print("Sentence Embedding 5:", sentence_embeddings5)
# Compute cosine-similarities
# cosine_scores = util.cos_sim(sentence_embeddings1, sentence_embeddings2)
# print("Cosine Similarity:", cosine_scores[0][0].item())
# cosine_scores = util.cos_sim(sentence_embeddings1, sentence_embeddings3)
# print("Cosine Similarity:", cosine_scores[0][0].item())
# cosine_scores = util.cos_sim(sentence_embeddings1, sentence_embeddings4)
# print("Cosine Similarity:", cosine_scores[0][0].item())
# cosine_scores = util.cos_sim(sentence_embeddings1, sentence_embeddings5)
# print("Cosine Similarity:", cosine_scores[0][0].item())
# Daftar vektor embedding
sentence_embeddings_list = [sentence_embeddings2, sentence_embeddings3, sentence_embeddings4, sentence_embeddings5]
# Variabel untuk menyimpan nilai cosine similarity tertinggi dan indeksnya
max_cosine_similarity = float('-inf')
max_cosine_similarity_index = None
# Dictionary untuk menyimpan hasil cosine similarity
cosine_similarities = {}
# Compute and store cosine similarities for each embedding
for i, embeddings in enumerate(sentence_embeddings_list, start=2):
cosine_scores = util.cos_sim(sentence_embeddings1, embeddings)
cosine_similarity = cosine_scores[0][0].item()
cosine_similarities[f'sentence_embeddings{i}'] = cosine_similarity
# Memeriksa apakah nilai cosine similarity saat ini lebih tinggi dari nilai maksimum sebelumnya
if cosine_similarity > max_cosine_similarity:
max_cosine_similarity = cosine_similarity
max_cosine_similarity_index = i
print(f"Cosine similarity {cosine_similarities}")
print(f"Max Cosine similarity {max_cosine_similarity_index}: { max_cosine_similarity}")
def normalize_cosine_similarity(cosine_similarity):
# Rentang awal dari cosine similarity (biasanya 0 sampai 1)
X_min = 0
X_max = 1
# Rentang tujuan yang diinginkan
new_min = 0
new_max = 20
# Menghitung nilai yang dinormalisasi
normalized_value = (max_cosine_similarity - X_min) / (X_max - X_min) * (new_max - new_min) + new_min
# Bulatkan nilai yang sudah dinormalisasi
rounded_normalized_value = round(normalized_value)
return rounded_normalized_value
normalized_value = normalize_cosine_similarity(max_cosine_similarity)
print(f"Nilai yang sudah dinormalisasi dan dibulatkan ke rentang [0, 20]: {normalized_value}")
# def mae(y_true, predictions):
# y_true, predictions = np.array(y_true), np.array(predictions)
# return np.mean(np.abs(y_true - predictions))
# def mse(y_true, predictions):
# y_true, predictions = np.array(y_true), np.array(predictions)
# return np.mean((y_true - predictions) ** 2)
# def pearson_correlation(y_true, predictions):
# Hitung koefisien korelasi Pearson menggunakan np.corrcoef
# correlation_matrix = np.corrcoef(y_true, predictions)
# Ambil elemen di baris pertama dan kolom kedua (karena kita ingin mendapatkan korelasi antara y_true dan predictions)
# correlation_coefficient = correlation_matrix[0, 1]
# return correlation_coefficient
def mae(y_true, predictions):
# Menghitung Mean Absolute Error
absolute_errors = np.abs(np.subtract(y_true, predictions))
return np.mean(absolute_errors)
def mse(y_true, predictions):
# Menghitung Mean Squared Error
squared_errors = np.square(np.subtract(y_true, predictions))
return np.mean(squared_errors)
def pearson_correlation(y_true, predictions):
# Calculate the mean of y_true and predictions
mean_y_true = np.mean(y_true)
mean_predictions = np.mean(predictions)
# Calculate the covariance
covariance = np.mean((y_true - mean_y_true) * (predictions - mean_predictions))
# Calculate the standard deviation of y_true and predictions
std_y_true = np.std(y_true)
std_predictions = np.std(predictions)
# Calculate the Pearson correlation coefficient
correlation_coefficient = covariance / (std_y_true * std_predictions)
return correlation_coefficient
# true = [sentence_embeddings2, sentence_embeddings3, sentence_embeddings4, sentence_embeddings5]
# predicted = [sentence_embeddings1, sentence_embeddings1, sentence_embeddings1, sentence_embeddings1]
true = np.array([sentence_embeddings2, sentence_embeddings3, sentence_embeddings4, sentence_embeddings5])
predicted = np.array([sentence_embeddings1, sentence_embeddings1, sentence_embeddings1, sentence_embeddings1])
for i in range(len(true)):
mae_value = mae(true[i], predicted[i])
mse_value = mse(true[i], predicted[i])
correlation_coefficient = pearson_correlation(true[i], predicted[i])
print(f'MAE for pair {i+2}: {mae_value}')
print(f'MSE for pair {i+2}: {mse_value}')
print(f'Koefisien Korelasi Pearson {i+2}: {correlation_coefficient}')
return JsonResponse({
# 'output': cosine_scores[0][0].item()
'output': normalized_value
}, status=200)